alexlib · January 10, 2013 00:40
diff --git a/trajAcc_to_hdf5.ipynb b/trajAcc_to_hdf5.ipynb
 {
 "metadata": {
  "name": "trajAcc_file_to_h5"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "%config InlineBackend.figure_format = 'svg'\nplt.figsize(10,6)\nfrom pandas import *\nimport pandas as pd\npd.set_option('repr_html', False)\npd.set_option('line_width', 120)\n\nimport time\n# from mpl_toolkits.mplot3d import axes3d\nimport glob\nimport os\nimport numpy as np",
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 49
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "# Define the directory:\ndata_dir = 'test_data'\n# the files are trajAcc.\ndir_path = os.path.join(data_dir, 'trajAcc.*')\n# list the files\nfiles = glob.glob(dir_path)\n# try to read one, see how many columns are:\ntmp = read_csv(files[0],'\\t')\nlines,cols = tmp.shape\n# we need to update the options also to 47 columns and provide correct names\nif cols == 32:\n    columns = ['x','y','z','u','v','w','ax','ay','az','ux','uy','uz','vx','vy','vz','wx','wy','wz',\\\n    'ut','vt','wt',\\\n    'daxdx','daxdy','daxdz',\\\n    'daydx','daydy','daydz',\\\n    'dazdx','dazdy','dazdz',\\\n    'id','age']\n    cols_to_read = [0,1,2,3,4,5,31] # we are going to use only x,y,z,u,v,w and age\nelif cols == 34:\n    columns = ['x','y','z','u','v','w','ax','ay','az','ux','uy','uz','vx','vy','vz','wx','wy','wz',\\\n    'ut','vt','wt',\\\n    'daxdx','daxdy','daxdz',\\\n    'daydx','daydy','daydz',\\\n    'dazdx','dazdy','dazdz',\\\n    'nx','ny','nz','age']\n    cols_to_read = [0,1,2,3,4,5,33] # we are going to use only x,y,z,u,v,w and age\n",
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 50
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "def get_data(f):\n    \"\"\" Function get_data(filename) reads the columns from the trajAcc file\n    adds the extension of the file which is a frame number to the age, creating\n    time sequence \"\"\"\n    frame = f.split('.')[1]\n    df = read_csv(f,sep='\\t',names=columns,usecols=cols_to_read)\n    # df['age'].dtype = np.int\n    df['t'] = df['age'].astype(np.int) + np.int(frame)\n    return df",
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 52
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "t = time.time()\n# Using nice trick of generator we create a list of all the data and then concatenate\n# into a single DataFrame object (see pandas tutorials)\ndf1 = concat(get_data(data_file) for data_file in files)\n# print df1.x.count()\nprint('done reading after ... %d sec ' % (time.time() - t) )\n\n        ",
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": "11837\ndone reading after ... 0 sec \n"
      }
     ],
     "prompt_number": 53
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "# we store the DataFrame into HDF5 table for later use\nstore = HDFStore('tmp.h5')\nstore['df'] = df1\nstore.close()",
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 54
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "df1.describe()",
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "pyout",
       "prompt_number": 56,
       "text": "                  x             y             z             u             v             w           age             t\ncount  11837.000000  11837.000000  11837.000000  11837.000000  11837.000000  11837.000000  11837.000000  11837.000000\nmean       0.007461      0.002877     -0.050775      0.008799     -0.001952      0.000680     19.658190  10020.199121\nstd        0.003269      0.002449      0.002268      0.005068      0.003025      0.005665     16.776826     16.810213\nmin       -0.000795     -0.001697     -0.055244     -0.013424     -0.012890     -0.018831      0.000000  10000.000000\n25%        0.005130      0.000832     -0.052593      0.005572     -0.004035     -0.003723      6.000000  10007.000000\n50%        0.007830      0.002683     -0.050860      0.009434     -0.002110      0.000266     15.000000  10016.000000\n75%        0.010288      0.004845     -0.049000      0.012515     -0.000022      0.004214     29.000000  10029.000000\nmax        0.013838      0.008204     -0.046061      0.020057      0.008560      0.040254     87.000000  10087.000000"
      }
     ],
     "prompt_number": 56
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "",
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
 }
	{
	"metadata": {
	"name": "trajAcc_file_to_h5"
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [
	{
	"cell_type": "code",
	"collapsed": false,
	"input": "%config InlineBackend.figure_format = 'svg'\nplt.figsize(10,6)\nfrom pandas import *\nimport pandas as pd\npd.set_option('repr_html', False)\npd.set_option('line_width', 120)\n\nimport time\n# from mpl_toolkits.mplot3d import axes3d\nimport glob\nimport os\nimport numpy as np",
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 49
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": "# Define the directory:\ndata_dir = 'test_data'\n# the files are trajAcc.\ndir_path = os.path.join(data_dir, 'trajAcc.*')\n# list the files\nfiles = glob.glob(dir_path)\n# try to read one, see how many columns are:\ntmp = read_csv(files[0],'\\t')\nlines,cols = tmp.shape\n# we need to update the options also to 47 columns and provide correct names\nif cols == 32:\n columns = ['x','y','z','u','v','w','ax','ay','az','ux','uy','uz','vx','vy','vz','wx','wy','wz',\\\n 'ut','vt','wt',\\\n 'daxdx','daxdy','daxdz',\\\n 'daydx','daydy','daydz',\\\n 'dazdx','dazdy','dazdz',\\\n 'id','age']\n cols_to_read = [0,1,2,3,4,5,31] # we are going to use only x,y,z,u,v,w and age\nelif cols == 34:\n columns = ['x','y','z','u','v','w','ax','ay','az','ux','uy','uz','vx','vy','vz','wx','wy','wz',\\\n 'ut','vt','wt',\\\n 'daxdx','daxdy','daxdz',\\\n 'daydx','daydy','daydz',\\\n 'dazdx','dazdy','dazdz',\\\n 'nx','ny','nz','age']\n cols_to_read = [0,1,2,3,4,5,33] # we are going to use only x,y,z,u,v,w and age\n",
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 50
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": "def get_data(f):\n \"\"\" Function get_data(filename) reads the columns from the trajAcc file\n adds the extension of the file which is a frame number to the age, creating\n time sequence \"\"\"\n frame = f.split('.')[1]\n df = read_csv(f,sep='\\t',names=columns,usecols=cols_to_read)\n # df['age'].dtype = np.int\n df['t'] = df['age'].astype(np.int) + np.int(frame)\n return df",
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 52
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": "t = time.time()\n# Using nice trick of generator we create a list of all the data and then concatenate\n# into a single DataFrame object (see pandas tutorials)\ndf1 = concat(get_data(data_file) for data_file in files)\n# print df1.x.count()\nprint('done reading after ... %d sec ' % (time.time() - t) )\n\n ",
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": "11837\ndone reading after ... 0 sec \n"
	}
	],
	"prompt_number": 53
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": "# we store the DataFrame into HDF5 table for later use\nstore = HDFStore('tmp.h5')\nstore['df'] = df1\nstore.close()",
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 54
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": "df1.describe()",
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "pyout",
	"prompt_number": 56,
	"text": " x y z u v w age t\ncount 11837.000000 11837.000000 11837.000000 11837.000000 11837.000000 11837.000000 11837.000000 11837.000000\nmean 0.007461 0.002877 -0.050775 0.008799 -0.001952 0.000680 19.658190 10020.199121\nstd 0.003269 0.002449 0.002268 0.005068 0.003025 0.005665 16.776826 16.810213\nmin -0.000795 -0.001697 -0.055244 -0.013424 -0.012890 -0.018831 0.000000 10000.000000\n25% 0.005130 0.000832 -0.052593 0.005572 -0.004035 -0.003723 6.000000 10007.000000\n50% 0.007830 0.002683 -0.050860 0.009434 -0.002110 0.000266 15.000000 10016.000000\n75% 0.010288 0.004845 -0.049000 0.012515 -0.000022 0.004214 29.000000 10029.000000\nmax 0.013838 0.008204 -0.046061 0.020057 0.008560 0.040254 87.000000 10087.000000"
	}
	],
	"prompt_number": 56
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": "",
	"language": "python",
	"metadata": {},
	"outputs": []
	}
	],
	"metadata": {}
	}
	]
	}
No results found