Skip to content

Instantly share code, notes, and snippets.

@taldcroft
Created July 22, 2014 18:33
Show Gist options
  • Select an option

  • Save taldcroft/4ac6e43fad88810c6561 to your computer and use it in GitHub Desktop.

Select an option

Save taldcroft/4ac6e43fad88810c6561 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:13c94d3975f6148268a787f116a6121a73a9d4cd60adc41a5494ed7fa6016c76"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from astropy.io import ascii\n",
"import pandas\n",
"import numpy as np\n",
"from astropy.table import Table\n",
"import cStringIO as StringIO"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 500000 line table of 6 floats across"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dat = np.random.uniform(size=500000)\n",
"t = Table([dat,dat,dat,dat,dat,dat])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Write out to a string buffer with full precision"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fh1 = StringIO.StringIO()\n",
"t.write(fh1, format='ascii')\n",
"fh1.getvalue()[:100]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"'col0 col1 col2 col3 col4 col5\\n0.9921951949754209 0.9921951949754209 0.9921951949754209 0.99219519497'"
]
}
],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Write to a string buffer with 5 digits precision"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fh2 = StringIO.StringIO()\n",
"for col in t.columns.values():\n",
" col.format = '%.5f'\n",
"t.write(fh2, format='ascii')\n",
"fh2.getvalue()[:100]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 4,
"text": [
"'col0 col1 col2 col3 col4 col5\\n0.99220 0.99220 0.99220 0.99220 0.99220 0.99220\\n0.65896 0.65896 0.6589'"
]
}
],
"prompt_number": 4
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Timings for 6-column full precision table"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"time fh1.seek(0); ascii.read(fh1, format='basic', guess=False, use_fast_reader=False)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 6.94 s, sys: 327 ms, total: 7.27 s\n",
"Wall time: 7.26 s\n"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"time fh1.seek(0); ascii.read(fh1, format='basic', guess=False, use_fast_reader=True)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 3.08 s, sys: 286 ms, total: 3.36 s\n",
"Wall time: 3.39 s\n"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"time fh1.seek(0); pandas.read_csv(fh1, sep=' ', header=0)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 708 ms, sys: 100 ms, total: 808 ms\n",
"Wall time: 823 ms\n"
]
}
],
"prompt_number": 7
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Timings for 6-column narrow-field table"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"time fh2.seek(0); ascii.read(fh2, format='basic', guess=False, use_fast_reader=False)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 5.75 s, sys: 283 ms, total: 6.03 s\n",
"Wall time: 6.06 s\n"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"time fh2.seek(0); ascii.read(fh2, format='basic', guess=False, use_fast_reader=True)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 825 ms, sys: 141 ms, total: 966 ms\n",
"Wall time: 982 ms\n"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"time fh2.seek(0); pandas.read_csv(fh2, sep=' ', header=0)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 382 ms, sys: 80.9 ms, total: 463 ms\n",
"Wall time: 463 ms\n"
]
}
],
"prompt_number": 10
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Make 3-column tables"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fh1 = StringIO.StringIO()\n",
"fh2 = StringIO.StringIO()\n",
"t = Table([dat,dat,dat])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"t.write(fh1, format='ascii')\n",
"for col in t.columns.values():\n",
" col.format = '%.5f'\n",
"t.write(fh2, format='ascii')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 12
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Timings for 3-column full precision table"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"time fh1.seek(0); ascii.read(fh1, format='basic', guess=False, use_fast_reader=False)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 4.01 s, sys: 216 ms, total: 4.22 s\n",
"Wall time: 4.28 s\n"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"time fh1.seek(0); ascii.read(fh1, format='basic', guess=False, use_fast_reader=True)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 1.61 s, sys: 140 ms, total: 1.75 s\n",
"Wall time: 1.77 s\n"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"time fh1.seek(0); pandas.read_csv(fh1, sep=' ', header=0)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 368 ms, sys: 68.2 ms, total: 437 ms\n",
"Wall time: 437 ms\n"
]
}
],
"prompt_number": 15
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Timings for 3-column narrow field table"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"time fh2.seek(0); ascii.read(fh2, format='basic', guess=False, use_fast_reader=False)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 3.22 s, sys: 192 ms, total: 3.41 s\n",
"Wall time: 3.41 s\n"
]
}
],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"time fh2.seek(0); ascii.read(fh2, format='basic', guess=False, use_fast_reader=True)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 405 ms, sys: 56.5 ms, total: 462 ms\n",
"Wall time: 461 ms\n"
]
}
],
"prompt_number": 17
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"time fh2.seek(0); pandas.read_csv(fh2, sep=' ', header=0)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 226 ms, sys: 53.1 ms, total: 279 ms\n",
"Wall time: 288 ms\n"
]
}
],
"prompt_number": 18
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment