Skip to content

Instantly share code, notes, and snippets.

@JiaweiZhuang
Created April 25, 2018 22:24
Show Gist options
  • Save JiaweiZhuang/1a287e68b5af9eccebad8056fb556b4f to your computer and use it in GitHub Desktop.
Save JiaweiZhuang/1a287e68b5af9eccebad8056fb556b4f to your computer and use it in GitHub Desktop.
# Parallelizing linear regression (across extra dimension) with Numba and dask
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Parallelizing linear regression (across extra dimension) with Numba and dask"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import numpy.linalg as LA\n",
"from scipy import stats\n",
"\n",
"import dask.array as da\n",
"from dask.diagnostics import ProgressBar\n",
"\n",
"from numba import jit, prange"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Input data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(0)\n",
"X = np.random.rand(20000, 4000)\n",
"Y = np.random.rand(20000, 4000)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Speed-up Linear regression itself"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Built-in Scipy.stats"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LinregressResult(slope=-0.0003621403862170396, intercept=0.5027361902987688, rvalue=-0.00036486335324476015, pvalue=0.9815954145065677, stderr=0.015697312038407407)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stats.linregress(X[0,:], Y[0,:])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"216 µs ± 13.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
"source": [
"%timeit stats.linregress(X[0,:], Y[0,:]) # deadly slow..."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Hand-written numba code"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"@jit(nopython=True, nogil=True)\n",
"def my_linregress(x, y):\n",
" '''\n",
" Adapted from http://charlesfranzen.com/posts/simple-linear-regression-in-python/\n",
" '''\n",
" n = x.size\n",
" sum_x = x.sum()\n",
" sum_y = y.sum()\n",
" sum_xy = (x*y).sum()\n",
" sum_xx = (x**2).sum()\n",
" \n",
" slope = (sum_xy - (sum_x*sum_y)/n)/(sum_xx - (sum_x*sum_x)/n)\n",
" \n",
" # only compute slope for now\n",
" # intercept = sum_y/n - slope*(sum_x/n)\n",
" \n",
" return slope"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-0.00036214038622500987"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"my_linregress(X[0,:], Y[0,:]) # same as scipy.stats result"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"23 µs ± 453 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
]
}
],
"source": [
"%timeit my_linregress(X[0,:], Y[0,:]) # 10x faster than Scipy"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Loop over the entire dataset"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# numba"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"@jit(nogil=True, nopython=True)\n",
"def regress_block(X, Y):\n",
" N = X.shape[0]\n",
" s = np.empty(N)\n",
" for i in range(N):\n",
" s[i] = my_linregress(X[i,:], Y[i,:])\n",
" return s"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(20000,)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Z = regress_block(X, Y)\n",
"Z.shape"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([-0.00036214, 0.00651474, 0.00686529, ..., -0.02137249,\n",
" -0.00529643, -0.00165035])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Z "
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"440 ms ± 10.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit regress_block(X, Y)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Numba parallel"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"@jit(nogil=True, nopython=True, parallel=True)\n",
"def regress_block_parallel(X, Y):\n",
" N = X.shape[0]\n",
" s = np.empty(N)\n",
" for i in prange(N):\n",
" s[i] = my_linregress(X[i,:], Y[i,:])\n",
" return s"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.array_equal(regress_block_parallel(X, Y), Z) # same as serial result"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"123 ms ± 2.54 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%timeit regress_block_parallel(X, Y) # 4x faster than serial"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Dask parallel"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"X_d = da.from_array(X, chunks=[2000,-1])\n",
"Y_d = da.from_array(Y, chunks=[2000,-1])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dask.array<array, shape=(20000, 4000), dtype=float64, chunksize=(2000, 4000)>"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_d"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# need the drop_axis keyword, see https://github.com/dask/dask/issues/263\n",
"Z_d = da.map_blocks(regress_block, X_d, Y_d, dtype=np.ndarray, drop_axis=[1])"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[########################################] | 100% Completed | 0.2s\n",
"CPU times: user 480 ms, sys: 12.2 ms, total: 492 ms\n",
"Wall time: 216 ms\n"
]
}
],
"source": [
"%%time\n",
"with ProgressBar():\n",
" Z_result = Z_d.compute(num_workers=4) # 2x faster than serial"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.array_equal(Z_result, Z) # same as serial result"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[########################################] | 100% Completed | 0.5s\n",
"CPU times: user 478 ms, sys: 11.8 ms, total: 490 ms\n",
"Wall time: 524 ms\n"
]
}
],
"source": [
"%%time\n",
"with ProgressBar():\n",
" Z_result = Z_d.compute(num_workers=1) # with one worker, as fast as serial "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment