Skip to content

Instantly share code, notes, and snippets.

@alexlib
Created January 10, 2013 00:40
Show Gist options
  • Select an option

  • Save alexlib/4498384 to your computer and use it in GitHub Desktop.

Select an option

Save alexlib/4498384 to your computer and use it in GitHub Desktop.
Reading trajAcc.#### files created by OpenPTV software (3D particle tracking velocimetry) into Pandas (www.pydata.org) DataFrame format and stores it in HDF5 (PyTables) format.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "trajAcc_file_to_h5"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": "%config InlineBackend.figure_format = 'svg'\nplt.figsize(10,6)\nfrom pandas import *\nimport pandas as pd\npd.set_option('repr_html', False)\npd.set_option('line_width', 120)\n\nimport time\n# from mpl_toolkits.mplot3d import axes3d\nimport glob\nimport os\nimport numpy as np",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 49
},
{
"cell_type": "code",
"collapsed": false,
"input": "# Define the directory:\ndata_dir = 'test_data'\n# the files are trajAcc.\ndir_path = os.path.join(data_dir, 'trajAcc.*')\n# list the files\nfiles = glob.glob(dir_path)\n# try to read one, see how many columns are:\ntmp = read_csv(files[0],'\\t')\nlines,cols = tmp.shape\n# we need to update the options also to 47 columns and provide correct names\nif cols == 32:\n columns = ['x','y','z','u','v','w','ax','ay','az','ux','uy','uz','vx','vy','vz','wx','wy','wz',\\\n 'ut','vt','wt',\\\n 'daxdx','daxdy','daxdz',\\\n 'daydx','daydy','daydz',\\\n 'dazdx','dazdy','dazdz',\\\n 'id','age']\n cols_to_read = [0,1,2,3,4,5,31] # we are going to use only x,y,z,u,v,w and age\nelif cols == 34:\n columns = ['x','y','z','u','v','w','ax','ay','az','ux','uy','uz','vx','vy','vz','wx','wy','wz',\\\n 'ut','vt','wt',\\\n 'daxdx','daxdy','daxdz',\\\n 'daydx','daydy','daydz',\\\n 'dazdx','dazdy','dazdz',\\\n 'nx','ny','nz','age']\n cols_to_read = [0,1,2,3,4,5,33] # we are going to use only x,y,z,u,v,w and age\n",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 50
},
{
"cell_type": "code",
"collapsed": false,
"input": "def get_data(f):\n \"\"\" Function get_data(filename) reads the columns from the trajAcc file\n adds the extension of the file which is a frame number to the age, creating\n time sequence \"\"\"\n frame = f.split('.')[1]\n df = read_csv(f,sep='\\t',names=columns,usecols=cols_to_read)\n # df['age'].dtype = np.int\n df['t'] = df['age'].astype(np.int) + np.int(frame)\n return df",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 52
},
{
"cell_type": "code",
"collapsed": false,
"input": "t = time.time()\n# Using nice trick of generator we create a list of all the data and then concatenate\n# into a single DataFrame object (see pandas tutorials)\ndf1 = concat(get_data(data_file) for data_file in files)\n# print df1.x.count()\nprint('done reading after ... %d sec ' % (time.time() - t) )\n\n ",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "11837\ndone reading after ... 0 sec \n"
}
],
"prompt_number": 53
},
{
"cell_type": "code",
"collapsed": false,
"input": "# we store the DataFrame into HDF5 table for later use\nstore = HDFStore('tmp.h5')\nstore['df'] = df1\nstore.close()",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 54
},
{
"cell_type": "code",
"collapsed": false,
"input": "df1.describe()",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 56,
"text": " x y z u v w age t\ncount 11837.000000 11837.000000 11837.000000 11837.000000 11837.000000 11837.000000 11837.000000 11837.000000\nmean 0.007461 0.002877 -0.050775 0.008799 -0.001952 0.000680 19.658190 10020.199121\nstd 0.003269 0.002449 0.002268 0.005068 0.003025 0.005665 16.776826 16.810213\nmin -0.000795 -0.001697 -0.055244 -0.013424 -0.012890 -0.018831 0.000000 10000.000000\n25% 0.005130 0.000832 -0.052593 0.005572 -0.004035 -0.003723 6.000000 10007.000000\n50% 0.007830 0.002683 -0.050860 0.009434 -0.002110 0.000266 15.000000 10016.000000\n75% 0.010288 0.004845 -0.049000 0.012515 -0.000022 0.004214 29.000000 10029.000000\nmax 0.013838 0.008204 -0.046061 0.020057 0.008560 0.040254 87.000000 10087.000000"
}
],
"prompt_number": 56
},
{
"cell_type": "code",
"collapsed": false,
"input": "",
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment