hmaarrfk · October 10, 2018 02:34
diff --git a/benchmarking_ndindex_nditer.ipynb b/benchmarking_ndindex_nditer.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "# done on linux, conda-forge python 3.6\n",
    "import itertools\n",
    "N = (100, 100, 100)\n",
    "N_tot = int(np.prod(N))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "16.5 ms ± 342 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "# Baseline test, probably can't go any faster than this\n",
    "for i in range(N_tot):\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10.5 ms ± 54.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "# This just proves me wrong, why does life not make sense\n",
    "for i in range(N[0]):\n",
    "    for j in range(N[1]):\n",
    "        for k in range(N[2]):\n",
    "            pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "401 ms ± 1.92 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "for i in np.ndindex(N):\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "26.8 ms ± 140 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "# Not as fast as the range over N_tot, but close enough\n",
    "for i in itertools.product(*[range(r) for r in N]):\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = np.random.random(N)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "27.6 ms ± 393 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "# A pretty good baseline, but we don't get the index\n",
    "for a_value in a.flat:\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "65.9 ms ± 529 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "# This isn't so bad, I would like performance closer to flat, \n",
    "# but hey, pretty good, nditer is quite complex\n",
    "for a_value in np.nditer(a):\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "66.1 ms ± 485 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "it = np.nditer(a, flags=['multi_index'])\n",
    "\n",
    "# Adding the multi_index flag has no perceivable effect on speed\n",
    "for a_value in it:\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "128 ms ± 841 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "it = np.nditer(a, flags=['multi_index'])\n",
    "# Accessing the multiindex, slows us down by a factor of 2\n",
    "for a_value in it:\n",
    "    i = it.multi_index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "164 ms ± 1.39 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "it = np.nditer(a, flags=['multi_index'])\n",
    "# Adding an other layer of generators is slightly slower\n",
    "for i in (it.multi_index for _ in it):\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "from numpy.lib.stride_tricks import as_strided"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "136 ms ± 7.56 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "# It doesn't seem to have anything to do with strided\n",
    "x = as_strided(np.zeros(1), shape=a.shape, strides=np.zeros_like(a.shape))\n",
    "_it = np.nditer(x, flags=['multi_index', 'zerosize_ok'], order='C')\n",
    "for _ in _it:\n",
    "    i = _it.multi_index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "161 ms ± 1.17 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "# It doesn't seem to have anything to do with strided\n",
    "x = as_strided(np.zeros(1), shape=a.shape, strides=np.zeros_like(a.shape))\n",
    "_it = np.nditer(x, flags=['multi_index', 'zerosize_ok'], order='C')\n",
    "for i in (_it.multi_index for _ in _it):\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Conclusion. It seems that python's lack of JIT really hamstrings\n",
    "# the creation of a class for iterators.\n",
    "# The choice of how you write effectively the same thing drastically affects performance."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import numpy as np\n",
	"# done on linux, conda-forge python 3.6\n",
	"import itertools\n",
	"N = (100, 100, 100)\n",
	"N_tot = int(np.prod(N))\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"16.5 ms ± 342 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"# Baseline test, probably can't go any faster than this\n",
	"for i in range(N_tot):\n",
	" pass"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"10.5 ms ± 54.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"# This just proves me wrong, why does life not make sense\n",
	"for i in range(N[0]):\n",
	" for j in range(N[1]):\n",
	" for k in range(N[2]):\n",
	" pass"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"401 ms ± 1.92 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"for i in np.ndindex(N):\n",
	" pass"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"26.8 ms ± 140 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"# Not as fast as the range over N_tot, but close enough\n",
	"for i in itertools.product(*[range(r) for r in N]):\n",
	" pass"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"a = np.random.random(N)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"27.6 ms ± 393 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"# A pretty good baseline, but we don't get the index\n",
	"for a_value in a.flat:\n",
	" pass"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"65.9 ms ± 529 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"# This isn't so bad, I would like performance closer to flat, \n",
	"# but hey, pretty good, nditer is quite complex\n",
	"for a_value in np.nditer(a):\n",
	" pass"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"66.1 ms ± 485 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"it = np.nditer(a, flags=['multi_index'])\n",
	"\n",
	"# Adding the multi_index flag has no perceivable effect on speed\n",
	"for a_value in it:\n",
	" pass"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"128 ms ± 841 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"it = np.nditer(a, flags=['multi_index'])\n",
	"# Accessing the multiindex, slows us down by a factor of 2\n",
	"for a_value in it:\n",
	" i = it.multi_index"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"164 ms ± 1.39 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"it = np.nditer(a, flags=['multi_index'])\n",
	"# Adding an other layer of generators is slightly slower\n",
	"for i in (it.multi_index for _ in it):\n",
	" pass"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [],
	"source": [
	"from numpy.lib.stride_tricks import as_strided"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"136 ms ± 7.56 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"# It doesn't seem to have anything to do with strided\n",
	"x = as_strided(np.zeros(1), shape=a.shape, strides=np.zeros_like(a.shape))\n",
	"_it = np.nditer(x, flags=['multi_index', 'zerosize_ok'], order='C')\n",
	"for _ in _it:\n",
	" i = _it.multi_index"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"161 ms ± 1.17 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"# It doesn't seem to have anything to do with strided\n",
	"x = as_strided(np.zeros(1), shape=a.shape, strides=np.zeros_like(a.shape))\n",
	"_it = np.nditer(x, flags=['multi_index', 'zerosize_ok'], order='C')\n",
	"for i in (_it.multi_index for _ in _it):\n",
	" pass"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Conclusion. It seems that python's lack of JIT really hamstrings\n",
	"# the creation of a class for iterators.\n",
	"# The choice of how you write effectively the same thing drastically affects performance."
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.6"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}