JiaweiZhuang · April 25, 2018 22:24
diff --git a/parallel_linregress.ipynb b/parallel_linregress.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Parallelizing linear regression (across extra dimension) with Numba and dask"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import numpy.linalg as LA\n",
    "from scipy import stats\n",
    "\n",
    "import dask.array as da\n",
    "from dask.diagnostics import ProgressBar\n",
    "\n",
    "from numba import jit, prange"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Input data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(0)\n",
    "X = np.random.rand(20000, 4000)\n",
    "Y = np.random.rand(20000, 4000)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Speed-up Linear regression itself"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Built-in Scipy.stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LinregressResult(slope=-0.0003621403862170396, intercept=0.5027361902987688, rvalue=-0.00036486335324476015, pvalue=0.9815954145065677, stderr=0.015697312038407407)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stats.linregress(X[0,:], Y[0,:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "216 µs ± 13.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit stats.linregress(X[0,:], Y[0,:]) # deadly slow..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Hand-written numba code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "@jit(nopython=True, nogil=True)\n",
    "def my_linregress(x, y):\n",
    "    '''\n",
    "    Adapted from http://charlesfranzen.com/posts/simple-linear-regression-in-python/\n",
    "    '''\n",
    "    n = x.size\n",
    "    sum_x = x.sum()\n",
    "    sum_y = y.sum()\n",
    "    sum_xy = (x*y).sum()\n",
    "    sum_xx = (x**2).sum()\n",
    "    \n",
    "    slope = (sum_xy - (sum_x*sum_y)/n)/(sum_xx - (sum_x*sum_x)/n)\n",
    "    \n",
    "    # only compute slope for now\n",
    "    # intercept = sum_y/n - slope*(sum_x/n)\n",
    "    \n",
    "    return slope"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-0.00036214038622500987"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "my_linregress(X[0,:], Y[0,:]) # same as scipy.stats result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "23 µs ± 453 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit my_linregress(X[0,:], Y[0,:]) # 10x faster than Scipy"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Loop over the entire dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# numba"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "@jit(nogil=True, nopython=True)\n",
    "def regress_block(X, Y):\n",
    "    N = X.shape[0]\n",
    "    s = np.empty(N)\n",
    "    for i in range(N):\n",
    "        s[i] = my_linregress(X[i,:], Y[i,:])\n",
    "    return s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(20000,)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Z = regress_block(X, Y)\n",
    "Z.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-0.00036214,  0.00651474,  0.00686529, ..., -0.02137249,\n",
       "       -0.00529643, -0.00165035])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Z "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "440 ms ± 10.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
     ]
    }
   ],
   "source": [
    "%timeit regress_block(X, Y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Numba parallel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "@jit(nogil=True, nopython=True, parallel=True)\n",
    "def regress_block_parallel(X, Y):\n",
    "    N = X.shape[0]\n",
    "    s = np.empty(N)\n",
    "    for i in prange(N):\n",
    "        s[i] = my_linregress(X[i,:], Y[i,:])\n",
    "    return s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.array_equal(regress_block_parallel(X, Y), Z) # same as serial result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "123 ms ± 2.54 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit regress_block_parallel(X, Y) # 4x faster than serial"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dask parallel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_d = da.from_array(X, chunks=[2000,-1])\n",
    "Y_d = da.from_array(Y, chunks=[2000,-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dask.array<array, shape=(20000, 4000), dtype=float64, chunksize=(2000, 4000)>"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# need the drop_axis keyword, see https://github.com/dask/dask/issues/263\n",
    "Z_d = da.map_blocks(regress_block, X_d, Y_d, dtype=np.ndarray, drop_axis=[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[########################################] | 100% Completed |  0.2s\n",
      "CPU times: user 480 ms, sys: 12.2 ms, total: 492 ms\n",
      "Wall time: 216 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "with ProgressBar():\n",
    "    Z_result = Z_d.compute(num_workers=4) # 2x faster than serial"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.array_equal(Z_result, Z) # same as serial result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[########################################] | 100% Completed |  0.5s\n",
      "CPU times: user 478 ms, sys: 11.8 ms, total: 490 ms\n",
      "Wall time: 524 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "with ProgressBar():\n",
    "    Z_result = Z_d.compute(num_workers=1) # with one worker, as fast as serial "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Parallelizing linear regression (across extra dimension) with Numba and dask"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import numpy as np\n",
	"import numpy.linalg as LA\n",
	"from scipy import stats\n",
	"\n",
	"import dask.array as da\n",
	"from dask.diagnostics import ProgressBar\n",
	"\n",
	"from numba import jit, prange"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Input data"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"np.random.seed(0)\n",
	"X = np.random.rand(20000, 4000)\n",
	"Y = np.random.rand(20000, 4000)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Speed-up Linear regression itself"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Built-in Scipy.stats"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"LinregressResult(slope=-0.0003621403862170396, intercept=0.5027361902987688, rvalue=-0.00036486335324476015, pvalue=0.9815954145065677, stderr=0.015697312038407407)"
	]
	},
	"execution_count": 3,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"stats.linregress(X[0,:], Y[0,:])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"216 µs ± 13.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
	]
	}
	],
	"source": [
	"%timeit stats.linregress(X[0,:], Y[0,:]) # deadly slow..."
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Hand-written numba code"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"@jit(nopython=True, nogil=True)\n",
	"def my_linregress(x, y):\n",
	" '''\n",
	" Adapted from http://charlesfranzen.com/posts/simple-linear-regression-in-python/\n",
	" '''\n",
	" n = x.size\n",
	" sum_x = x.sum()\n",
	" sum_y = y.sum()\n",
	" sum_xy = (x*y).sum()\n",
	" sum_xx = (x**2).sum()\n",
	" \n",
	" slope = (sum_xy - (sum_xsum_y)/n)/(sum_xx - (sum_xsum_x)/n)\n",
	" \n",
	" # only compute slope for now\n",
	" # intercept = sum_y/n - slope*(sum_x/n)\n",
	" \n",
	" return slope"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"-0.00036214038622500987"
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"my_linregress(X[0,:], Y[0,:]) # same as scipy.stats result"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"23 µs ± 453 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
	]
	}
	],
	"source": [
	"%timeit my_linregress(X[0,:], Y[0,:]) # 10x faster than Scipy"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Loop over the entire dataset"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# numba"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [],
	"source": [
	"@jit(nogil=True, nopython=True)\n",
	"def regress_block(X, Y):\n",
	" N = X.shape[0]\n",
	" s = np.empty(N)\n",
	" for i in range(N):\n",
	" s[i] = my_linregress(X[i,:], Y[i,:])\n",
	" return s"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"(20000,)"
	]
	},
	"execution_count": 9,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"Z = regress_block(X, Y)\n",
	"Z.shape"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([-0.00036214, 0.00651474, 0.00686529, ..., -0.02137249,\n",
	" -0.00529643, -0.00165035])"
	]
	},
	"execution_count": 10,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"Z "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"440 ms ± 10.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
	]
	}
	],
	"source": [
	"%timeit regress_block(X, Y)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Numba parallel"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [],
	"source": [
	"@jit(nogil=True, nopython=True, parallel=True)\n",
	"def regress_block_parallel(X, Y):\n",
	" N = X.shape[0]\n",
	" s = np.empty(N)\n",
	" for i in prange(N):\n",
	" s[i] = my_linregress(X[i,:], Y[i,:])\n",
	" return s"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"execution_count": 13,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"np.array_equal(regress_block_parallel(X, Y), Z) # same as serial result"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"123 ms ± 2.54 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
	]
	}
	],
	"source": [
	"%timeit regress_block_parallel(X, Y) # 4x faster than serial"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Dask parallel"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [],
	"source": [
	"X_d = da.from_array(X, chunks=[2000,-1])\n",
	"Y_d = da.from_array(Y, chunks=[2000,-1])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"dask.array<array, shape=(20000, 4000), dtype=float64, chunksize=(2000, 4000)>"
	]
	},
	"execution_count": 16,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"X_d"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [],
	"source": [
	"# need the drop_axis keyword, see https://github.com/dask/dask/issues/263\n",
	"Z_d = da.map_blocks(regress_block, X_d, Y_d, dtype=np.ndarray, drop_axis=[1])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[########################################] \| 100% Completed \| 0.2s\n",
	"CPU times: user 480 ms, sys: 12.2 ms, total: 492 ms\n",
	"Wall time: 216 ms\n"
	]
	}
	],
	"source": [
	"%%time\n",
	"with ProgressBar():\n",
	" Z_result = Z_d.compute(num_workers=4) # 2x faster than serial"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"execution_count": 19,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"np.array_equal(Z_result, Z) # same as serial result"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[########################################] \| 100% Completed \| 0.5s\n",
	"CPU times: user 478 ms, sys: 11.8 ms, total: 490 ms\n",
	"Wall time: 524 ms\n"
	]
	}
	],
	"source": [
	"%%time\n",
	"with ProgressBar():\n",
	" Z_result = Z_d.compute(num_workers=1) # with one worker, as fast as serial "
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}