Skip to content

Instantly share code, notes, and snippets.

@seibert
Created January 19, 2015 21:16
Show Gist options
  • Save seibert/6957baddc067140e55fe to your computer and use it in GitHub Desktop.
Save seibert/6957baddc067140e55fe to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:8af50e38cd3e711ccd8a1e5995e3f6d3149c104d3a2cb9ee46e7fa8196f8ce2d"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import os\n",
"os.environ['NUMBA_ENABLE_AVX'] = '1'\n",
"import numpy as np\n",
"import numba\n",
"import math"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def find_alignment(x):\n",
" data = x.ctypes.data\n",
" \n",
" bit = 0\n",
" while data != 0:\n",
" if data & 1:\n",
" break\n",
" else:\n",
" data >>= 1\n",
" bit += 1\n",
"\n",
" return 2**bit\n",
"\n",
"def get_empty(shape, dtype, align16=False):\n",
" old = []\n",
" for i in range(100):\n",
" x = np.empty(shape, dtype)\n",
" x_alignment = find_alignment(x)\n",
" if (align16 and x_alignment == 16) or (not align16 and x_alignment > 16):\n",
" return x\n",
" else:\n",
" old.append(x) # To avoid getting this allocation again on the next loop\n",
" else:\n",
" raise ValueError('Unable to achieve desired align16=%s' % align16)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a_align = get_empty(10000, np.float32, align16=False)\n",
"a_align[:] = np.arange(10000).astype(np.float32)\n",
"b_align = get_empty(10000, np.float32, align16=False)\n",
"b_align[:] = a_align\n",
"out_align = get_empty(10000, np.float32, align16=False)\n",
"\n",
"print map(find_alignment, (a_align, b_align, out_align))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[32, 64, 32]\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a_nonalign = get_empty(10000, np.float32, align16=True)\n",
"a_nonalign[:] = np.arange(10000).astype(np.float32)\n",
"b_nonalign = get_empty(10000, np.float32, align16=True)\n",
"b_nonalign[:] = a_nonalign\n",
"out_nonalign = get_empty(10000, np.float32, align16=True)\n",
"\n",
"print map(find_alignment, (a_nonalign, b_nonalign, out_nonalign))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[16, 16, 16]\n"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"@numba.jit\n",
"def do_math(a, b, out):\n",
" for i in range(a.shape[0]):\n",
" out[i] = a[i] + b[i] * math.fabs(a[i])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"do_math(a_align, b_align, out_align)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit do_math(a_nonalign, b_nonalign, out_nonalign)\n",
"%timeit do_math(a_align, b_align, out_align)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"100000 loops, best of 3: 3.59 \u00b5s per loop\n",
"100000 loops, best of 3: 2.46 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 7
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment