johnsloper · August 15, 2018 13:13 · Brainor · Jan 24, 2024 · smoosbau · Sep 2, 2024
diff --git a/rolling.ipynb b/rolling.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-15T13:07:25.891183Z",
     "start_time": "2018-08-15T13:07:23.739938Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook shows "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-15T13:07:25.901159Z",
     "start_time": "2018-08-15T13:07:25.893178Z"
    }
   },
   "outputs": [],
   "source": [
    "s = pd.Series(range(10**6))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-15T13:07:26.046768Z",
     "start_time": "2018-08-15T13:07:25.904148Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0              NaN\n",
       "1              0.5\n",
       "2              1.5\n",
       "3              2.5\n",
       "4              3.5\n",
       "5              4.5\n",
       "6              5.5\n",
       "7              6.5\n",
       "8              7.5\n",
       "9              8.5\n",
       "10             9.5\n",
       "11            10.5\n",
       "12            11.5\n",
       "13            12.5\n",
       "14            13.5\n",
       "15            14.5\n",
       "16            15.5\n",
       "17            16.5\n",
       "18            17.5\n",
       "19            18.5\n",
       "20            19.5\n",
       "21            20.5\n",
       "22            21.5\n",
       "23            22.5\n",
       "24            23.5\n",
       "25            24.5\n",
       "26            25.5\n",
       "27            26.5\n",
       "28            27.5\n",
       "29            28.5\n",
       "            ...   \n",
       "999970    999969.5\n",
       "999971    999970.5\n",
       "999972    999971.5\n",
       "999973    999972.5\n",
       "999974    999973.5\n",
       "999975    999974.5\n",
       "999976    999975.5\n",
       "999977    999976.5\n",
       "999978    999977.5\n",
       "999979    999978.5\n",
       "999980    999979.5\n",
       "999981    999980.5\n",
       "999982    999981.5\n",
       "999983    999982.5\n",
       "999984    999983.5\n",
       "999985    999984.5\n",
       "999986    999985.5\n",
       "999987    999986.5\n",
       "999988    999987.5\n",
       "999989    999988.5\n",
       "999990    999989.5\n",
       "999991    999990.5\n",
       "999992    999991.5\n",
       "999993    999992.5\n",
       "999994    999993.5\n",
       "999995    999994.5\n",
       "999996    999995.5\n",
       "999997    999996.5\n",
       "999998    999997.5\n",
       "999999    999998.5\n",
       "Length: 1000000, dtype: float64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s.rolling(window=2).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-15T13:07:26.055744Z",
     "start_time": "2018-08-15T13:07:26.049760Z"
    }
   },
   "outputs": [],
   "source": [
    "def rolling_window(a, window):\n",
    "    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)\n",
    "    strides = a.strides + (a.strides[-1],)\n",
    "    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-15T13:07:26.107613Z",
     "start_time": "2018-08-15T13:07:26.057739Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([5.000000e-01, 1.500000e+00, 2.500000e+00, ..., 9.999965e+05,\n",
       "       9.999975e+05, 9.999985e+05])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.mean(rolling_window(s, 2), axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-15T12:59:15.696610Z",
     "start_time": "2018-08-15T12:59:15.692604Z"
    }
   },
   "source": [
    "## Performance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-15T13:07:33.393120Z",
     "start_time": "2018-08-15T13:07:26.109611Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "63.7 ms ± 4.32 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
      "25.6 ms ± 1.34 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "s = pd.Series(np.random.randint(10, size=10**6))\n",
    "%timeit s.rolling(window=2).mean()\n",
    "%timeit np.mean(rolling_window(s, 2), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-15T13:07:50.905278Z",
     "start_time": "2018-08-15T13:07:33.395112Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "58.5 ms ± 1.79 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
      "15.4 ms ± 147 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
   "source": [
    "s = pd.Series(np.random.randint(10, size=10**6))\n",
    "%timeit s.rolling(window=2).sum()\n",
    "%timeit np.sum(rolling_window(s, 2), axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Specializing a bit"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using cumsum is a faster way of calculating means:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-15T13:07:50.912277Z",
     "start_time": "2018-08-15T13:07:50.907276Z"
    }
   },
   "outputs": [],
   "source": [
    "def moving_average(a, N) :\n",
    "    cumsum = np.cumsum(np.insert(a, 0, 0)) \n",
    "    return (cumsum[N:] - cumsum[:-N]) / float(N)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-15T13:08:11.026482Z",
     "start_time": "2018-08-15T13:07:50.919241Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "64.6 ms ± 3.17 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
      "27.2 ms ± 1.66 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
      "15.7 ms ± 1.11 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
   "source": [
    "s = pd.Series(np.random.randint(10, size=10**6))\n",
    "%timeit s.rolling(window=2).mean()\n",
    "%timeit np.mean(rolling_window(s, 2), axis=1)\n",
    "%timeit moving_average(s.values, 2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Taking advantage of convolve in np is even faster. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-15T13:08:38.486719Z",
     "start_time": "2018-08-15T13:08:11.028453Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "56.8 ms ± 1.08 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
      "24.6 ms ± 390 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
      "14.7 ms ± 634 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n",
      "11.2 ms ± 1.42 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
   "source": [
    "s = pd.Series(np.random.randint(10, size=10**6))\n",
    "%timeit s.rolling(window=2).mean()\n",
    "%timeit np.mean(rolling_window(s, 2), axis=1)\n",
    "%timeit moving_average(s.values, 2)\n",
    "%timeit np.convolve(s.values, np.ones((2,))/2, mode='valid')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Finally lets look at the performance for smaller arrays. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-15T13:10:51.979527Z",
     "start_time": "2018-08-15T13:10:34.692373Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "242 µs ± 5.35 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n",
      "58 µs ± 1.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
      "33.9 µs ± 3.91 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
      "9.67 µs ± 464 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
     ]
    }
   ],
   "source": [
    "s = pd.Series(np.random.randint(10, size=10**3))\n",
    "%timeit s.rolling(window=2).mean()\n",
    "%timeit np.mean(rolling_window(s, 2), axis=1)\n",
    "%timeit moving_average(s.values, 2)\n",
    "%timeit np.convolve(s.values, np.ones((2,))/2, mode='valid')"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Edit Metadata",
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2018-08-15T13:07:25.891183Z",
	"start_time": "2018-08-15T13:07:23.739938Z"
	}
	},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import numpy as np"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"This notebook shows "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2018-08-15T13:07:25.901159Z",
	"start_time": "2018-08-15T13:07:25.893178Z"
	}
	},
	"outputs": [],
	"source": [
	"s = pd.Series(range(10**6))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2018-08-15T13:07:26.046768Z",
	"start_time": "2018-08-15T13:07:25.904148Z"
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0 NaN\n",
	"1 0.5\n",
	"2 1.5\n",
	"3 2.5\n",
	"4 3.5\n",
	"5 4.5\n",
	"6 5.5\n",
	"7 6.5\n",
	"8 7.5\n",
	"9 8.5\n",
	"10 9.5\n",
	"11 10.5\n",
	"12 11.5\n",
	"13 12.5\n",
	"14 13.5\n",
	"15 14.5\n",
	"16 15.5\n",
	"17 16.5\n",
	"18 17.5\n",
	"19 18.5\n",
	"20 19.5\n",
	"21 20.5\n",
	"22 21.5\n",
	"23 22.5\n",
	"24 23.5\n",
	"25 24.5\n",
	"26 25.5\n",
	"27 26.5\n",
	"28 27.5\n",
	"29 28.5\n",
	" ... \n",
	"999970 999969.5\n",
	"999971 999970.5\n",
	"999972 999971.5\n",
	"999973 999972.5\n",
	"999974 999973.5\n",
	"999975 999974.5\n",
	"999976 999975.5\n",
	"999977 999976.5\n",
	"999978 999977.5\n",
	"999979 999978.5\n",
	"999980 999979.5\n",
	"999981 999980.5\n",
	"999982 999981.5\n",
	"999983 999982.5\n",
	"999984 999983.5\n",
	"999985 999984.5\n",
	"999986 999985.5\n",
	"999987 999986.5\n",
	"999988 999987.5\n",
	"999989 999988.5\n",
	"999990 999989.5\n",
	"999991 999990.5\n",
	"999992 999991.5\n",
	"999993 999992.5\n",
	"999994 999993.5\n",
	"999995 999994.5\n",
	"999996 999995.5\n",
	"999997 999996.5\n",
	"999998 999997.5\n",
	"999999 999998.5\n",
	"Length: 1000000, dtype: float64"
	]
	},
	"execution_count": 3,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"s.rolling(window=2).mean()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2018-08-15T13:07:26.055744Z",
	"start_time": "2018-08-15T13:07:26.049760Z"
	}
	},
	"outputs": [],
	"source": [
	"def rolling_window(a, window):\n",
	" shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)\n",
	" strides = a.strides + (a.strides[-1],)\n",
	" return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2018-08-15T13:07:26.107613Z",
	"start_time": "2018-08-15T13:07:26.057739Z"
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([5.000000e-01, 1.500000e+00, 2.500000e+00, ..., 9.999965e+05,\n",
	" 9.999975e+05, 9.999985e+05])"
	]
	},
	"execution_count": 5,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"np.mean(rolling_window(s, 2), axis=1)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"ExecuteTime": {
	"end_time": "2018-08-15T12:59:15.696610Z",
	"start_time": "2018-08-15T12:59:15.692604Z"
	}
	},
	"source": [
	"## Performance"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2018-08-15T13:07:33.393120Z",
	"start_time": "2018-08-15T13:07:26.109611Z"
	}
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"63.7 ms ± 4.32 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
	"25.6 ms ± 1.34 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
	]
	}
	],
	"source": [
	"s = pd.Series(np.random.randint(10, size=10**6))\n",
	"%timeit s.rolling(window=2).mean()\n",
	"%timeit np.mean(rolling_window(s, 2), axis=1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2018-08-15T13:07:50.905278Z",
	"start_time": "2018-08-15T13:07:33.395112Z"
	}
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"58.5 ms ± 1.79 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
	"15.4 ms ± 147 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
	]
	}
	],
	"source": [
	"s = pd.Series(np.random.randint(10, size=10**6))\n",
	"%timeit s.rolling(window=2).sum()\n",
	"%timeit np.sum(rolling_window(s, 2), axis=1)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Specializing a bit"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Using cumsum is a faster way of calculating means:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2018-08-15T13:07:50.912277Z",
	"start_time": "2018-08-15T13:07:50.907276Z"
	}
	},
	"outputs": [],
	"source": [
	"def moving_average(a, N) :\n",
	" cumsum = np.cumsum(np.insert(a, 0, 0)) \n",
	" return (cumsum[N:] - cumsum[:-N]) / float(N)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2018-08-15T13:08:11.026482Z",
	"start_time": "2018-08-15T13:07:50.919241Z"
	}
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"64.6 ms ± 3.17 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
	"27.2 ms ± 1.66 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
	"15.7 ms ± 1.11 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
	]
	}
	],
	"source": [
	"s = pd.Series(np.random.randint(10, size=10**6))\n",
	"%timeit s.rolling(window=2).mean()\n",
	"%timeit np.mean(rolling_window(s, 2), axis=1)\n",
	"%timeit moving_average(s.values, 2)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Taking advantage of convolve in np is even faster. "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2018-08-15T13:08:38.486719Z",
	"start_time": "2018-08-15T13:08:11.028453Z"
	}
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"56.8 ms ± 1.08 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
	"24.6 ms ± 390 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
	"14.7 ms ± 634 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n",
	"11.2 ms ± 1.42 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
	]
	}
	],
	"source": [
	"s = pd.Series(np.random.randint(10, size=10**6))\n",
	"%timeit s.rolling(window=2).mean()\n",
	"%timeit np.mean(rolling_window(s, 2), axis=1)\n",
	"%timeit moving_average(s.values, 2)\n",
	"%timeit np.convolve(s.values, np.ones((2,))/2, mode='valid')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Finally lets look at the performance for smaller arrays. "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2018-08-15T13:10:51.979527Z",
	"start_time": "2018-08-15T13:10:34.692373Z"
	}
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"242 µs ± 5.35 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n",
	"58 µs ± 1.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
	"33.9 µs ± 3.91 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
	"9.67 µs ± 464 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
	]
	}
	],
	"source": [
	"s = pd.Series(np.random.randint(10, size=10**3))\n",
	"%timeit s.rolling(window=2).mean()\n",
	"%timeit np.mean(rolling_window(s, 2), axis=1)\n",
	"%timeit moving_average(s.values, 2)\n",
	"%timeit np.convolve(s.values, np.ones((2,))/2, mode='valid')"
	]
	}
	],
	"metadata": {
	"celltoolbar": "Edit Metadata",
	"hide_input": false,
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.5"
	},
	"toc": {
	"base_numbering": 1,
	"nav_menu": {},
	"number_sections": true,
	"sideBar": true,
	"skip_h1_title": false,
	"title_cell": "Table of Contents",
	"title_sidebar": "Contents",
	"toc_cell": false,
	"toc_position": {},
	"toc_section_display": true,
	"toc_window_display": false
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}