Skip to content

Instantly share code, notes, and snippets.

@kandersolar
Created August 27, 2021 21:29
Show Gist options
  • Select an option

  • Save kandersolar/09c320d08ef8daac80f3302e4b11b1ac to your computer and use it in GitHub Desktop.

Select an option

Save kandersolar/09c320d08ef8daac80f3302e4b11b1ac to your computer and use it in GitHub Desktop.
timing comparisons of reading hdf5 and netcdf4 data with various python libraries
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "70648ff5-92a9-47e5-bf80-152bfc58a570",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tables 3.6.1\n",
"h5py 3.2.1\n",
"netCDF4 1.5.7\n",
"xarray 0.19.0\n",
"scipy 1.7.1\n"
]
}
],
"source": [
"import tables\n",
"# import netCDF4 before h5py: https://stackoverflow.com/a/62690211\n",
"import netCDF4\n",
"import h5py\n",
"import xarray as xr\n",
"import scipy\n",
"\n",
"for pkg in [tables, h5py, netCDF4, xr, scipy]:\n",
" print(pkg.__name__, pkg.__version__)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "27120641-4e5b-43e7-909b-96e8a0c2d072",
"metadata": {},
"outputs": [],
"source": [
"def tables_h5(h5file, x, y):\n",
" with tables.open_file(h5file) as h5data:\n",
" return h5data.root.LinkeTurbidity[x, y]\n",
"\n",
"def h5py_h5(h5file, x, y):\n",
" with h5py.File(h5file, 'r') as h5data:\n",
" return h5data['LinkeTurbidity'][x, y]\n",
"\n",
"def netcdf4_h5(h5file, x, y):\n",
" ncf = netCDF4.Dataset(h5file)\n",
" x = ncf['LinkeTurbidity'][x, y].filled()\n",
" ncf.close()\n",
" return x\n",
"\n",
"def xarray_h5(h5file, x, y):\n",
" ds = xr.open_dataset(h5file)\n",
" return ds['LinkeTurbidity'][x, y].values\n",
"\n",
"test_funcs = [tables_h5, h5py_h5, netcdf4_h5, xarray_h5]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "bdde534f-56c0-4654-9619-7af4ed1788b7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tables_h5 \t [38 38 38 39 40 41 42 42 42 39 39 38] <class 'numpy.ndarray'>\n",
"h5py_h5 \t [38 38 38 39 40 41 42 42 42 39 39 38] <class 'numpy.ndarray'>\n",
"netcdf4_h5 \t [38 38 38 39 40 41 42 42 42 39 39 38] <class 'numpy.ndarray'>\n",
"xarray_h5 \t [38 38 38 39 40 41 42 42 42 39 39 38] <class 'numpy.ndarray'>\n"
]
}
],
"source": [
"# test they all do the same thing:\n",
"h5file = 'LinkeTurbidities.h5'\n",
"\n",
"for test_func in test_funcs:\n",
" result = test_func(h5file, 100, 100)\n",
" print(test_func.__name__, '\\t', result, type(result))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "6c2e63fc-aaa4-402c-ba3c-6b691b7dc8db",
"metadata": {},
"outputs": [],
"source": [
"# built-in timing magics were giving me inconsistent results\n",
"from collections import defaultdict\n",
"import time\n",
"import pandas as pd\n",
"import random\n",
"\n",
"timings = defaultdict(list)\n",
"for i in range(200):\n",
" x = random.randint(0, 2159)\n",
" y = random.randint(0, 4319)\n",
" for test_func in test_funcs:\n",
" # time.perf_counter() has higher resolution than time.time() on windows\n",
" # https://docs.python.org/3.5/library/time.html#time.perf_counter\n",
" st = time.perf_counter()\n",
" _ = test_func(h5file, x, y)\n",
" ed = time.perf_counter()\n",
" timings[test_func.__name__].append(ed - st)\n",
"\n",
"timings = pd.DataFrame(timings)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "1fe71265-4f39-447e-ac05-c326e8e52f53",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, 'hdf5 file read times')"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEJCAYAAACDscAcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAneUlEQVR4nO3de5xdZX3v8c+XCQEMCAqYQgIkhaiTpALHCLXm1Iw5QMRLqCWaiReQadMKRKxHKTgWlXY4wNFi22CO0YncJ1wsNmAEhMxoo3IJBiXJkDomIBPwxiUlkUBm+J0/1jNhZzOzs/ew9+y9k+/79dqvWetZz7PWbz8zs397rWddFBGYmZkVa69qB2BmZvXFicPMzErixGFmZiVx4jAzs5I4cZiZWUmcOMzMrCROHFaTJD0q6X+VUD8kHZOm95N0m6TNkm4exrbfIekXkrZIOk3S9ySdkZadKWllqescCcPos7WSZlQuIttdjap2AGYVcDowFjg4IvrSh+MK4A85dc6JiKuHaH8xsDAi/iXNf6dCcY4YSVcBvRHx+YGyiJhSvYisnjlx2O7oKOC/IqIvp+yJiBhfQvu15Q/rZZIEKCJequR2zCrBh6qslh0n6efpkNONkvYdWCDps5KelPSEpLNyyr8EXAR8KB1qaillg5J+CfwxcFtqv4+kLkl/NUT9N0v6vqSnJa2X9MEC6+6S1CbpR2R7P39cqL2k90haLem/JT0u6Yt56/uopMckPSWptcB25wMfBs5P7+m2VL7j0JakL0q6WdJ1kp6T9LCkN0q6UNJv0/ZPzlnngZLa0+9gk6R/ktSQlh0j6Qfp9/Z7STfusuOtrjhxWC37IDALmAi8BTgTQNIs4DPAScAkYMdx/Yj4AnAJcGNE7B8R7WnRGyT9RtJGSVdIGjPYBiPiaOBXwPtS+xeGCi6t4/vADcAbgLnA1yRNLvCePgrMBw4AfreL9luBjwEHAe8BPiHptLTtycCitL7DgYOBQfeoImIxcD1weXpP7xsitvcB1wKvA1YDd5J9RowjO3z39Zy6VwF9wDHA8cDJwEBy/UfgrrSe8cC/FegPq0NOHFbL/jUinoiIp4HbgONS+QeBb0XEmojYCnxxF+t5JLU9DHgX8Fbgn8sQ33uBRyPiWxHRFxGrgW8Dcwq0uSoi1qbDaLMKtY+Iroh4OCJeioifAx3AO9N6Tgduj4gfpuT2D8CrPez1nxFxZ4rtZuBQ4NKI2A4sBSZIOkjSWOBU4FMRsTUifgtcQZb4ALaTHe47PCK2RURNnkxgw+fEYbXs1znTfwD2T9OHA4/nLHus0Eoi4tcRsS59AG8Ezgf+sgzxHQWcKOnZgRfZIaE/KtAmN+6C7SWdKKlT0u8kbQb+Fjgktd2pD1ICfepVvp/f5Ew/D/w+Ivpz5iH7HRwF7A08mRP318n2miDrXwH3pzO3zsJ2Kx4ct3r0JHBEzvyRJbYPyvOl6XHgBxFxUonbLrb9DcBC4N0RsU3SV3k5cTwJNA5UlPQassNVxWz31XoceAE4JO8EhGxDEb8G/jrFNR24W9IPI6KnjDFYFXmPw+rRTcCZkianD8wvFKosqUnSUcocAVwK/EcZ4rgdeGMapN47vd4mqXGXLYtrfwDwdEoaJwDzctreArxX0nRJo8nGIAr9P/+GbND/VYuIJ8nGML4i6bWS9pJ0tKR3AkiaI2lgvOUZsqTls8d2I04cVnci4nvAV8muzehJPws5Hvgx2WDzj4GHgU+WIY7nyAaF5wJPkB1auwzYp0ztzwYulvQc2ZliN+W0XQucQ7ZX8iTZB3Rvgc21A5PToaXvFPcOC/oYMBpYl7Z9C9kYEsDbgPskbQGWAedFxIYybNNqhPwgJzMzK4X3OMzMrCROHGZmVhInDjMzK4kTh5mZlWSPuI7jkEMOiQkTJlQ7jIK2bt3KmDGD3gXDhsH9WV7uz/Kqh/588MEHfx8Rhw62bI9IHBMmTGDVqlXVDqOgrq4uZsyYUe0wdhvuz/Jyf5ZXPfSnpCHvyOBDVWZmVhInDjMzK4kTh5mZlcSJw8zMSuLEYWY2Qjo6Opg6dSozZ85k6tSpdHR0VDukYdkjzqoyM6u2jo4OWltbaW9vp7+/n4aGBlpasicbNzc3Vzm60niPw8xsBLS1tdHe3k5TUxOjRo2iqamJ9vZ22traqh1ayZw4zMxGQHd3N9OnT9+pbPr06XR3d1cpouFz4jAzGwGNjY2sXLnz49dXrlxJY2Oxz/2qHU4cZmYjoLW1lZaWFjo7O+nr66Ozs5OWlhZaW1urHVrJPDhuZjYCBgbAFyxYQHd3N42NjbS1tdXdwDg4cZiZjZjm5maam5vr4l5VhfhQlZmZlcSJw8zMSuLEYWZmJXHiMDMbIb7liJmZFc23HDEzs5L4liNmZlYS33LEzMxK4luOmJlZSXzLETMzK4lvOWJmZiXzLUfMzGyP5MRhZmYlqWjikDRL0npJPZIuGGT5PpJuTMvvkzQhZ9mFqXy9pFNyyg+SdIukRyR1S3p7Jd+DmZntrGKJQ1IDcCXwbmAy0Cxpcl61FuCZiDgGuAK4LLWdDMwFpgCzgK+l9QH8C3BHRLwZOBaov5OgzczqWCX3OE4AeiJiQ0S8CCwFZufVmQ1cnaZvAWZKUipfGhEvRMRGoAc4QdKBwJ8D7QAR8WJEPFvB92BmZnkqmTjGAY/nzPemskHrREQfsBk4uEDbicDvgG9JWi3pm5LGVCZ8MzMbTL2djjsK+B/Agoi4T9K/ABcA/5BfUdJ8YD7A2LFj6erqGsk4S7Zly5aaj7GeuD/Ly/1ZXvXen5VMHJuAI3Lmx6eywer0ShoFHAg8VaBtL9AbEfel8lvIEscrRMRiYDHAtGnTotbPma7387prjfuzvNyf5VXv/VnJQ1UPAJMkTZQ0mmywe1lenWXAGWn6dGBFREQqn5vOupoITALuj4hfA49LelNqMxNYV8H3YGZmeSq2xxERfZLOBe4EGoAlEbFW0sXAqohYRjbIfa2kHuBpsuRCqncTWVLoA86JiP606gXA9SkZbQA+Xqn3YGZmr1TRMY6IWA4szyu7KGd6GzBniLZtwCtuVB8RDwHTyhqomZkVzVeOm5lZSZw4zMysJE4cZmZWEicOMzMriROHmZmVxInDzMxK4sRhZmYlceIwM7OSOHGYmVlJnDjMzKwkThxmZlYSJw4zMyuJE4eZmZXEicPMzErixGFmNkI6OjqYOnUqM2fOZOrUqXR0dFQ7pGGpt2eOm5nVpY6ODlpbW2lvb6e/v5+GhgZaWloAaG5urnJ0pfEeh5nZCGhra6O9vZ2mpiZGjRpFU1MT7e3ttLW94nl1Nc+Jw8xsBHR3d9Pb27vToare3l66u7urHVrJfKjKzGwEHH744Zx//vnccMMNOw5VzZs3j8MPP7zaoZXMicPMbIRs27aNs846i8cee4yjjjqKbdu2sf/++1c7rJL5UJWZ2QjYtGkTo0Zl39UlATBq1Cg2bdpUzbCGxYnDzGwEjB49mgsvvJCNGzdyzz33sHHjRi688EJGjx5d7dBK5kNVZmYj4MUXX2ThwoUcf/zx9Pf309nZycKFC3nxxRerHVrJKrrHIWmWpPWSeiRdMMjyfSTdmJbfJ2lCzrILU/l6SafklD8q6WFJD0laVcn4zczKZfLkycybN48FCxZwyimnsGDBAubNm8fkyZOrHVrJKrbHIakBuBI4CegFHpC0LCLW5VRrAZ6JiGMkzQUuAz4kaTIwF5gCHA7cLemNEdGf2jVFxO8rFbuZ2XAMjF0MZe3atTtND8wP1S4iyhdcGVVyj+MEoCciNkTEi8BSYHZendnA1Wn6FmCmsh6cDSyNiBciYiPQk9ZnZlazIqLg64YbbmDKlCmgvZgyZQo33HBDwfq1qpJjHOOAx3Pme4ETh6oTEX2SNgMHp/J789qOS9MB3CUpgK9HxOLBNi5pPjAfYOzYsXR1db2qN1NpW7ZsqfkY64n7s7zcn+Vx2GGHsXDhQs68YysLZ40BqMt+rcfB8ekRsUnSG4DvS3okIn6YXykllMUA06ZNixkzZoxwmKXp6uqi1mOsJ+7P8nJ/ltkd363r/qzkoapNwBE58+NT2aB1JI0CDgSeKtQ2IgZ+/ha4FR/CMjMbUZVMHA8AkyRNlDSabLB7WV6dZcAZafp0YEVkB/aWAXPTWVcTgUnA/ZLGSDoAQNIY4GRgTQXfg5mZ5anYoao0ZnEucCfQACyJiLWSLgZWRcQyoB24VlIP8DRZciHVuwlYB/QB50REv6SxwK3pDIRRwA0RcUel3oOZmb1SRcc4ImI5sDyv7KKc6W3AnCHatgFteWUbgGPLH6mZmRVryMQh6fVFtH8pIp4tXzhmZlbrCu1xPJFeha5oaQCOLGtEZmZW0wolju6IOL5QY0mryxyPmZnVuEJnVb29iPbF1DEzs93IkIkjDVwj6WhJ+6TpGZI+Kemg3DpmZrbnKOY6jm8D/ZKOIbsS+wjghopGZWZmNauYxPFSRPQBfwH8W0R8FjissmGZmVmtKiZxbJfUTHaF9+2pbO/KhWRmZrWsmMTxcbJB8LaI2JhuAXJtZcMyM7NatcvEERHrIuKTEdGR5jdGxGWVD23P0NHRwdSpU5k5cyZTp06lo6Oj2iGZmRVU6MrxxRExv1DjYurY0Do6OmhtbaW9vZ3+/n4aGhpoaWkBoLm5ucrRmZkNrtAFgKdJKnS6rYCmMsezR2lra6O9vZ2mpqYdzztob29nwYIFThxmVrMKJY7PFtH+P8sVyJ6ou7ub6dOn71Q2ffp0uru7qxSRmdmuDZk4IuLqoZZZeTQ2NrJy5Uqaml7ecVu5ciWNjY1VjMrMrLBKPsjJdqG1tZWWlhY6Ozvp6+ujs7OTlpYWWltbqx2amdmQ6vGZ47uNgXGMBQsW0N3dTWNjI21tbR7fMLOa5sRRZc3NzTQ3N+8YHDczq3WFTse9DYihlkfE+ysSkZmZ1bRCexxfTj8/APwRcF2abwZ+U8mgzMysdhU6q+oHAJK+EhHTchbdJmlVxSMzM7OaVMwYxxhJfxwRGwDSvarGVDYsM7PacuyX7mLz89vLtr4JF3z3Va/jwP325mdfOLkM0ZSmmMTxd0CXpA1kV4sfBfxNRaMyM6sxm5/fzqOXvqcs6yrXyTDlSD7DUcxNDu8AJgHnAZ8E3hQRdxazckmzJK2X1CPpgkGW7yPpxrT8PkkTcpZdmMrXSzolr12DpNWSbs9fp5mZVVaxp+NOAt4E7AscK4mIuKZQA0kNwJXASUAv8ICkZRGxLqdaC/BMRBwjaS5wGfAhSZOBucAU4HDgbklvjIj+1O48oBt4bZHxm5lZmexyj0PSF4B/S68m4HKgmFNxTwB6ImJDRLwILAVm59WZDQzc2uQWYKYkpfKlEfFCRGwEetL6kDQeeA/wzSJiMDOzMitmj+N04FhgdUR8XNJYXj41t5BxwOM5873AiUPViYg+SZuBg1P5vXltx6XprwLnAwcU2rik+cB8gLFjx9LV1VVEyNWzZcuWmo+xnrg/y8v9mSlXH5SzP6vxeykmcTwfES9J6pP0WuC3wBEVjmtQkt4L/DYiHpQ0o1DdiFgMLAaYNm1a1PpV2b5yvLzcn+XR0dFBW1vbjlvitLa27rm3xLnju2X7myrb32cZYypFMTc5XCXpIOAbwIPAT4GfFNFuEzsnmPGpbNA6kkYBBwJPFWj7DuD9kh4lO/T1LknF7P2YWYk6Ojo477zz2Lp1KxHB1q1bOe+88/yUSivqrKqzI+LZiPh/ZAPdZ0TEx4tY9wPAJEkTJY0mG+xelldnGXBGmj4dWBERkcrnprOuJpINzt8fERdGxPiImJDWtyIiPlJELGZWovPPP5+GhgaWLFnCXXfdxZIlS2hoaOD888+vdmhWZcUMjkvSRyRdFBGPAs9KOmFX7SKiDzgXuJPsDKibImKtpIslDQyutwMHS+oBPg1ckNquBW4C1gF3AOfknFFlZiOgt7eXa665hqamJkaNGkVTUxPXXHMNvb291Q7NqqyYMY6vAS8B7wIuBp4Dvg28bVcNI2I5sDyv7KKc6W3AnCHatgFtBdbdBXTtKgYzMyuvYsY4ToyIc4BtABHxDDC6olGZWdWNHz+eM844Y6cHjZ1xxhmMHz++2qFZlRWzx7E9XcwXAJIOJdsDMbPd2OWXX855553HWWedxa9+9SuOPPJI+vr6+MpXvlLt0KrigMYL+JOrX3EDjOErw8O5D2iE7LK2kVVM4vhX4FbgDZLayAaxP1/RqMys6gZOu21ry44YjxkzhksuuWSPPR33ue5Lfa+qpGDikLQXsJHsgruZZDc5PC0iukcgNjOrMj+h0gZTMHGkC/+ujIjjgUdGKCYzM6thxQyO3yPpL9M9pKzMOjo6mDp1KjNnzmTq1Km+uMrMal4xYxx/Q3aNRZ+kbWSHqyIifGfaV6mjo4PW1lba29vp7++noaGBlpYWgD32OPKr5VtkmFXeLhNHRBS8maANX1tbG+3t7TQ1Ne04htze3s6CBQv8YTcMTsRmI6PY53FYBXR3d3PJJZcwc+ZMIgJJzJw5k+5un3swHG1tbcybN48FCxbs2OOYN28ebW1tThxmZeTEUUX77bcfd999N5/4xCc49dRTWb58OYsWLWLMGD/SfTjWrVvH1q1bWbJkyY49jrPOOovHHnus2qHVLR/621lZT3+9ozzPHK8GJ44q2rp1KwcccABz5syhv7+fOXPmcN111/Hcc89VO7S6NHr0aBYsWLDTob8FCxbwuc99rtqh1SUf+ttZua7hgCwBlXN9Iy4ihnwBDcAjherUw+utb31r1CIgFi9eHFOmTIm99torpkyZEosXL47s12KlkhSHHnpoTJgwISTFhAkT4tBDDw1J1Q6tLk2ZMiVWrFgRERGdnZ0REbFixYqYMmVKFaPaPRz197dXO4RdAlbFEJ+pBU/HjeyOtOslHVnxDLYHksTq1atZs2YN99xzD2vWrGH16tX4zOfhGTduHNu3bwfY0Yfbt29n3LhxhZrZELq7u5k+ffpOZdOnT/cYnBV1qOp1wFpJ9wNbBwojopjnjlsBJ510EosWLQLg1FNP5eyzz2bRokWcfPLJVY6sfu277747jXHMmzev2iHVrcbGRlauXElTU9OOspUrV9LY2FjFqKwWFJM4/qHiUezmdrUHsWjRoh0JBOCuu+4q2Cbbi7R8TzzxBFddddVOZ1VdfvnlnHnmmdUOrS61trbS0tKyY4yjs7OTlpaWHfeusj1XMddx/EDSUcCkiLhb0mvIxj6sSMV80Nf9YFkNaGxsZPz48axZs2bH4HhnZ6e/IQ/TwAB4biL2qc0GxT0B8K+BW4Cvp6JxwHcqGJPZsAx8Q859fkRLSwutra3VDq1uNTc37zQG56RhUNyhqnOAE4D7ACLiF5LeUNGozIbB35BLV+4TMXwYdc9QzE0OX4iIFwdmJI0iPdTJrNb4G3JphjrdMv911N/fXlQ92zMUkzh+IOlzwH6STgJuBm6rbFhmZlarijlUdQHQAjxMdqfc5cA3KxmUWSE+vGJWXcWcVfWSpKvJxjgCWB/+T7Mq8llqZtVVzFlV7wF+Sfbs8YVAj6R3F7NySbMkrZfUI+kVT3mXtI+kG9Py+yRNyFl2YSpfL+mUVLavpPsl/UzSWklfKvJ9mplVnKSiXo9d9t6i6tWqYsY4vgI0RcSMiHgn0ARcsatGkhqAK4F3A5OBZkmT86q1AM9ExDFpnZeltpOBucAUYBbwtbS+F4B3RcSxwHHALEl/WsR7MDOruGJPNujs7Kzrkw2KSRzPRURPzvwGoJjbt54A9ETEhnRW1lJgdl6d2cDVafoWYGZ6RO1sYGlEvBARG4Ee4IR0760tqf7e6VW7vWtmthsqZnB8laTlwE1kH9JzgAckfQAgIv59iHbjgMdz5nuBE4eqExF9kjYDB6fye/PajoMdezIPAscAV0bEfYNtXNJ8YD7A2LFj6erqKuKtVlc9xFhP3J/l5f4sny1bttR1fxaTOPYFfgO8M83/DtgPeB9ZIhkqcVREumPvcZIOAm6VNDUi1gxSbzGwGGDatGkxY8aMkQyzdHd8l5qPsZ64P8vL/VlWA7fEqVfFnFX18WGuexNwRM78+FQ2WJ3edGHhgcBTxbSNiGcldZKNgbwicZiZWWUUM8YxXA8AkyRNlDSabLB7WV6dZcAZafp0YEU61XcZMDeddTURmATcL+nQtKeBpP2Ak4BHKvgezMwsT8UeHZvGLM4F7iS7m+6SiFgr6WKyJ0stA9qBayX1AE+TJRdSvZuAdUAfcE5E9Es6DLg6jXPsBdwUEbdX6j2YmdkrVfSZ4xGxnOxK89yyi3Kmt5ENtg/Wtg1oyyv7OXB8+SM1M7NiFXMB4FhJ7ZK+l+YnS2qpfGhmZlaLihnjuIrscNPhaf6/gE9VKB4zM6txxSSOQyLiJuAlyMYugP6KRmVmZjWrmMSxVdLBpCu00y0+Nlc0KjMzq1nFDI5/muz02KMl/Qg4lOzUWTMz2wMVcwHgTyW9E3gTILLbqm+veGRmZlaTdpk40jUTpwITUv2TJRER/1zh2MzMrAYVc6jqNmAb2RMAX6psOPXn2C/dxebny7MDNuGC75ZlPQfutzc/+8LJZVmXmVm+YhLH+Ih4S8UjqVObn99elifNlfOmZ+VKQGZmgykmcXxP0skRcVfFozGzsinn3jB4j9heVkziuJfs9uV7AdvJBsgjIl5b0cjM7FUp194weI/YdlZM4vhn4O3Aw1HLzzI0M7MRUcwFgI8Da5w0zMwMitvj2AB0pZscvjBQ6NNxzcz2TMUkjo3pNTq9zCrCg7lm9aGYK8e/NBKBmHkw16w+DJk4JC2MiHMl3Ua6wWGuiHh/RSOrEwc0XsCfXH1BeVZ2dXlWc0AjQHk+gM3M8hXa4/gYcC7w5RGKpS49132pLwA0sz1KocTxS4CI+MEIxWJmZVTWvWHwHrHtUChxHCrp00Mt9FlVZrWtXHvD4D1i21mhxNEA7E92pbiZmRlQOHE8GREXj1gkZmZWFwpdOf6q9zQkzZK0XlKPpFccbJW0j6Qb0/L7JE3IWXZhKl8v6ZRUdoSkTknrJK2VdN6rjdHMzEpTaI9j5qtZcXoA1JXASUAv8ICkZRGxLqdaC/BMRBwjaS5wGfAhSZOBucAU4HDgbklvBPqA/52eSngA8KCk7+etc8SV7ZjtHeW7YK0eeTDXrD4MmTgi4ulXue4TgJ6I2AAgaSkwG8j9kJ8NfDFN3wIslKRUvjQiXgA2SuoBToiInwBPpviek9QNjMtb54gq1+DjhAu+W7Z11SsP5prVh2JuOTJc48hukDigFzhxqDoR0SdpM3BwKr83r+243IbpsNbxwH1ljdpsN1LWpLeH7xHbyyqZOCpG0v7At4FPRcR/D1FnPjAfYOzYsXR1dY1cgMNUDzFWWrn6YMuWLWXtz3r83Vw1a0zZ1nXmHVvLur567M9yKvff50irZOLYBByRMz8+lQ1Wp1fSKOBA4KlCbSXtTZY0ro+Ifx9q4xGxGFgMMG3atCjXYYuKueO7ZTu0UrfK2AflPFTl3w3ugzIr699nFRTzPI7hegCYJGmipNFkg93L8uosA85I06cDK9JzP5YBc9NZVxOBScD9afyjHej2BYhmZtVRsT2ONGZxLnAn2cWESyJiraSLgVURsYwsCVybBr+fJksupHo3kQ169wHnRES/pOnAR4GHJT2UNvW5iFheqfdhZmY7q+gYR/pAX55XdlHO9DZgzhBt24C2vLKV+Ep2M7OqqsvBcdt9+Swgs9rnxGE1o5zXsfi6GLPKceIYAdmYfhH1Litufdn5A2Zm1VHJs6osiYhdvjo7O4uq56RhZtXmPQ6zPVixe8NQ3B6xv9jsGbzHYbYHK3Yvt9g9YtszOHGYmVlJnDjMzKwkThxmZlYSD45b3fHpzWbV5T0Oqzs+vdmsupw4zMysJE4cZmZWEicOMzMriROHmZmVxInDzMxK4sRhZmYlceIwM7OSOHGYmVlJnDjMzKwkThxmZlYSJw4zMytJRROHpFmS1kvqkXTBIMv3kXRjWn6fpAk5yy5M5eslnZJTvkTSbyWtqWTsZmY2uIolDkkNwJXAu4HJQLOkyXnVWoBnIuIY4ArgstR2MjAXmALMAr6W1gdwVSozM7MqqOQexwlAT0RsiIgXgaXA7Lw6s4Gr0/QtwExl98yeDSyNiBciYiPQk9ZHRPwQeLqCcZuZWQGVTBzjgMdz5ntT2aB1IqIP2AwcXGRbMzOrgt32QU6S5gPzAcaOHUtXV1d1A9qFLVu21HyM9cT9WV7uz/Kq9/6sZOLYBByRMz8+lQ1Wp1fSKOBA4Kki2xYUEYuBxQDTpk2LGTNmlNJ8xHV1dVHrMdYT92d5uT/Lq977s5KHqh4AJkmaKGk02WD3srw6y4Az0vTpwIrIHsm2DJibzrqaCEwC7q9grGZmVqSKJY40ZnEucCfQDdwUEWslXSzp/alaO3CwpB7g08AFqe1a4CZgHXAHcE5E9ANI6gB+ArxJUq+klkq9BzMze6WKjnFExHJgeV7ZRTnT24A5Q7RtA9oGKW8uc5hmZlYCXzluZmYlceIwM7OSOHGYmVlJnDjMzKwkThxmZlYSJw4zMyuJE4eZmZXEicPMzErixGFmZiVx4jAzs5I4cZiZWUmcOMzMrCROHGZmVhInDjMzK4kTh5mZlcSJw8zMSuLEYWZmJXHiMDOzkjhxmJlZSZw4zMysJE4cZmZWEicOMzMriROHmZmVpKKJQ9IsSesl9Ui6YJDl+0i6MS2/T9KEnGUXpvL1kk4pdp1mZlZZFUsckhqAK4F3A5OBZkmT86q1AM9ExDHAFcBlqe1kYC4wBZgFfE1SQ5HrNDOzCqrkHscJQE9EbIiIF4GlwOy8OrOBq9P0LcBMSUrlSyPihYjYCPSk9RWzTjMzq6BRFVz3OODxnPle4MSh6kREn6TNwMGp/N68tuPS9K7WCYCk+cD8NLtF0vphvIeRdAjw+2oHsRtxf5aX+7O86qE/jxpqQSUTR1VFxGJgcbXjKJakVRExrdpx7C7cn+Xl/iyveu/PSh6q2gQckTM/PpUNWkfSKOBA4KkCbYtZp5mZVVAlE8cDwCRJEyWNJhvsXpZXZxlwRpo+HVgREZHK56azriYCk4D7i1ynmZlVUMUOVaUxi3OBO4EGYElErJV0MbAqIpYB7cC1knqAp8kSAaneTcA6oA84JyL6AQZbZ6Xewwirm8NqdcL9WV7uz/Kq6/5U9gXfzMysOL5y3MzMSuLEYWZmJXHiMDOzkjhxlEDSQZLO3kWdCZLWDLGsS1JZzt2WdJWk04dY1i/pofSqi7POhuo3SV+UtCnn/Zxaxm3OkHT7EMuukrQxZ7vHlWu71SLptFJv0ZP7dybpf0pam/pjv1T2Wkm9khbuYj1bhig/U9Lvcvr5r0qJb09Ra/23214AWCEHAWcDX6tyHLvyfEQcV+0gyuiKiPhyFbb72Yi4pQrbrZTTgNvJzlYcjg8D/ycirssp+0fgh68yrhsj4txXuY6qS7dLUkS8NNh8BY14/3mPozSXAkenzH6FpHsk/VTSw5Jy75k1StL1krol3SLpNfkrknSypJ+k9jdL2j+VXyppnaSfS9rVh+WfS/qxpA1D7X3UmQZJ30jfau8a+FY7mPRN6z/SXtwvJH0hlV8s6VM59doknVdgm/un39Ej6Xem8r2dykp7ad35fSbpaEl3SHpQ0n9KerOkPwPeD/zf9Pd7tKRjJN0t6Wfp7/BoZRYquwP13cAb0rb+Cvgg8I+Srk9lbwXGAncVGW9b2ta9ksZWpFPKQNLb0v/fvpLGpL6dOtj/e/odrJd0DbAG+J9580dIWiRpVVrPl1K7d0n6Ts42T5J06y7iqp3+iwi/inwBE4A1aXoU8No0fQjZjRiV6gTwjrRsCfCZNN0FTEv1fwiMSeV/D1xEdp+u9bx8mvRBBWK5CriZLPlPJrv548CyPmAV2f2+Tqt2v5XQt33AcWn+JuAjwBeBR4Gfp758XVp+JvBk6rP9yP5Jp6X1/DTV2Qv4JXDwENucAWwmuwPBXsBPgOk5/bs+bfcKYJ9q91EJfXYPMCmVnUh2Ye3Aezo9p/19wF+k6X2B1wAfAL5Pdp3U4cCzA21y26f+6kp9dyawcBexBvC+NH058Pm83+PPyW50ekS1+zXF9U/Al8nuxn0hhf/fXwL+NOd3smM+lb0+/WxIffaW1PYR4NC07IaB/qmH/vMex/AJuETSz4G7yW7COPAt4PGI+FGavg6Yntf2T8k+7H8k6SGyq+ePIvsQ2wa0S/oA8IddxPCdiHgpItblbBvgqMjugzMP+Kqko4fzBqtgY0Q8lKYfJPsnXAQcDRxH9g/ylZz634+IpyLieeDfyT70HwWeknQ8cDKwOiKeKrDN+yOiN7LDCQ+lbUL2YfFm4G3A68mSey0arM/+DLg5/W19HTgsv5GkA4BxEXErQERsi4g/AH8OdEREf0Q8AawYYrtnA8sjorfIOF8kO0yWGyfAbcCEiHgLWcK6+pVNq+Ji4CSyLyOXU/j//bGIyL0pa/78ByX9FFhN9qiIyZF96l8LfETSQcDbge8ViKem+s9jHMP3YeBQ4K0RsV3So2Tf2iD7dpArf15kH3rN+SuVdAIwk+wWLOcC7yoQwwt568w2FrEp/dwgqQs4nuybd63LfT/9wH4R8ZuBAknf4OV/Hhi6n79J9k3sj8j2UkrZ5iiAiHhyYLmkbwGfKSL+asiPfyzwbFR+jOvtZIdlzgb2B0ZL2hIRQz1cbXv6sByIc6Cfc5P6N8k+pGvBwWTva2+y/+s5DP3/vjWv7Y55ZbdM+gzwtoh4RtJVOe2+RfbBvw24OSL6CsRTU/3nPY7SPAcckKYPBH6b/oia2PkWxEdKenuangeszFvPvcA7JB0DkI6jvjGNcxwYEcuBvwOOLTVASa+TtE+aPgR4B8MfDK06Sbnflv+C7JDUgJMkvT6NhZwGDOzl3Ur2ALC3kd2eZtjbTWMep+Vtt5b9N7BR0hzI4pc08He04+83Ip4DeiWdlurto2ws7ofAh5Q9OO0woGmwjUTEhyPiyIiYQPbBeE2BpDGkvN/v+4HuUtdRIV8H/gG4nuwBc4X+3wt5LVki2ZzGJd49sCDt0T0BfJ4siZSsWv3nPY4SRMRTkn6k7LTRB4A3S3qYbDzhkZyq64FzJC0h+9BelLee30k6E+gY+JAn++N5DvgPSfuS7UF8ehhhNgJfl/QS2ReDS9OhrHp1ubJTYYNsrONvcpbdD3yb7Dj7dRGxCiAiXpTUSfbNu3+Y271e0qFkv4eHgL8d5nqq4cPAIkmfJ/vGvBT4Wfr5DUmfJNuj/SjZ38rFwHayb9W3ku3lrgN+RTbuU0mflPR+srGap8n2FKtK0sfIvuHfoOypoz8mS6hzh/h/H1JE/EzS6lT/cV7+cjPgerJxjuF+4Fel/3yvKqtLKfFOi0FOQ5S0F/BTYE5E/GKkYzMrlrLrX1ZHRHu1YymFD1XZbkXZBW49wD1OGlbLJD1IdobVdbuqW2u8x1HjJLWSHULIdXNEtFUjnnok6U/IzmDJ9UJEDPrYYRs+SfcB++QVfzQiHq5GPPWmXvrPicPMzEriQ1VmZlYSJw4zMyuJE4eZmZXEicPMzEry/wG15iLqXbEijgAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"timings.boxplot()\n",
"plt.ylabel('Time per read [s]')\n",
"plt.ylim(bottom=0)\n",
"plt.title('hdf5 file read times')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "3ec608b4-7ed0-41f1-82a4-df1d42444330",
"metadata": {},
"outputs": [],
"source": [
"# re-save the h5 data to nc\n",
"ds = xr.open_dataset(h5file)\n",
"netcdf_file = 'test.nc'\n",
"ds.to_netcdf(netcdf_file)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "9743bfdb-e818-4a27-a663-e39599ecc406",
"metadata": {},
"outputs": [],
"source": [
"def netcdf4_nc(ncfile, x, y):\n",
" ncf = netCDF4.Dataset(ncfile)\n",
" x = ncf['LinkeTurbidity'][x, y].filled()\n",
" ncf.close()\n",
" return x\n",
"\n",
"def xarray_nc(ncfile, x, y):\n",
" ds = xr.open_dataset(ncfile)\n",
" return ds['LinkeTurbidity'][x, y].values\n",
"\n",
"test_funcs = [netcdf4_nc, xarray_nc]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "e4c203b7-d0c0-464d-aa33-27b37c948b37",
"metadata": {},
"outputs": [],
"source": [
"timings = defaultdict(list)\n",
"for i in range(200):\n",
" x = random.randint(0, 2159)\n",
" y = random.randint(0, 4319)\n",
" for test_func in test_funcs:\n",
" # time.perf_counter() has higher resolution than time.time() on windows\n",
" # https://docs.python.org/3.5/library/time.html#time.perf_counter\n",
" st = time.perf_counter()\n",
" _ = test_func(netcdf_file, x, y)\n",
" ed = time.perf_counter()\n",
" timings[test_func.__name__].append(ed - st)\n",
"\n",
"timings = pd.DataFrame(timings)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "60446a57-5296-4732-9620-d2129ad41728",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, 'netcdf file read times')"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"timings.boxplot()\n",
"plt.ylabel('Time per read [s]')\n",
"plt.ylim(bottom=0)\n",
"plt.title('netcdf file read times')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "c11869ed-8c18-49ec-80b2-e0c5d7998573",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(15641028, 15637705)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"os.path.getsize(netcdf_file), os.path.getsize(h5file)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
@AdamRJensen
Copy link

Do we use tables for anything else? xarray and netcdf seems preferred as they have other uses

@mikofski
Copy link

Thanks for this. OK, I didn't realize that the format of LinkeTurbidity.h5 was already sane. I can read it using tables as in the existing code or using h5py just as easily. I always just assumed that we were using tables because of pandas, but now I realize that pandas isn't even used at all.

import pvlib
import pathlib
import tables
import h5py

# get linke turbidity hdf5 file
# note: it is sanely organized, not created using pandas
# note: pandas isn't used at all, no need for tables
pvlib_path = pathlib.Path(pvlib.__file__)
tl_flie = (pvlib_path / 'data'/ 'LinkeTurbidities.h5')

# using tables, looks the same as h5py
tl_tables = tables.open_file(tl_flie )

# only difference is that tables uses dot notation
tl_tables.root.LinkeTurbidity[10, 5, :]
# array([38, 38, 38, 38, 40, 41, 42, 42, 40, 39, 38, 38], dtype=uint8)

alldata = tl_tables.root.LinkeTurbidity[:, :, :]  # it's a numpy array
alldata.shape
# (2160, 4320, 12)

alldata.dtype
# dtype('uint8')

tl_tables.close()  # must close if context not used

# use h5py, from the hdf5 library maintainers
tl_h5 = h5py.File(tl_flie )
# main difference is that uses keys (or paths) as indices instead of dot notation
# like numpy structured array API
# note root, "/", is assumed, unless using a path as a key 
tl_h5['LinkeTurbidity'][10, 5, :]
# array([38, 38, 38, 38, 40, 41, 42, 42, 40, 39, 38, 38], dtype=uint8)

So essentially easy to just remove tables and use h5py, no problemo

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment