Created
February 23, 2018 18:51
-
-
Save martindurant/deb36c8fb4692df23f27a201e71d4c89 to your computer and use it in GitHub Desktop.
GCS FUSE latest
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import xarray as xr" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\u001b[0m\u001b[01;32mconus_ens_001.nc\u001b[0m* \u001b[01;32mconus_ens_027.nc\u001b[0m* \u001b[01;32mconus_ens_053.nc\u001b[0m* \u001b[01;32mconus_ens_079.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_002.nc\u001b[0m* \u001b[01;32mconus_ens_028.nc\u001b[0m* \u001b[01;32mconus_ens_054.nc\u001b[0m* \u001b[01;32mconus_ens_080.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_003.nc\u001b[0m* \u001b[01;32mconus_ens_029.nc\u001b[0m* \u001b[01;32mconus_ens_055.nc\u001b[0m* \u001b[01;32mconus_ens_081.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_004.nc\u001b[0m* \u001b[01;32mconus_ens_030.nc\u001b[0m* \u001b[01;32mconus_ens_056.nc\u001b[0m* \u001b[01;32mconus_ens_082.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_005.nc\u001b[0m* \u001b[01;32mconus_ens_031.nc\u001b[0m* \u001b[01;32mconus_ens_057.nc\u001b[0m* \u001b[01;32mconus_ens_083.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_006.nc\u001b[0m* \u001b[01;32mconus_ens_032.nc\u001b[0m* \u001b[01;32mconus_ens_058.nc\u001b[0m* \u001b[01;32mconus_ens_084.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_007.nc\u001b[0m* \u001b[01;32mconus_ens_033.nc\u001b[0m* \u001b[01;32mconus_ens_059.nc\u001b[0m* \u001b[01;32mconus_ens_085.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_008.nc\u001b[0m* \u001b[01;32mconus_ens_034.nc\u001b[0m* \u001b[01;32mconus_ens_060.nc\u001b[0m* \u001b[01;32mconus_ens_086.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_009.nc\u001b[0m* \u001b[01;32mconus_ens_035.nc\u001b[0m* \u001b[01;32mconus_ens_061.nc\u001b[0m* \u001b[01;32mconus_ens_087.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_010.nc\u001b[0m* \u001b[01;32mconus_ens_036.nc\u001b[0m* \u001b[01;32mconus_ens_062.nc\u001b[0m* \u001b[01;32mconus_ens_088.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_011.nc\u001b[0m* \u001b[01;32mconus_ens_037.nc\u001b[0m* \u001b[01;32mconus_ens_063.nc\u001b[0m* \u001b[01;32mconus_ens_089.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_012.nc\u001b[0m* \u001b[01;32mconus_ens_038.nc\u001b[0m* \u001b[01;32mconus_ens_064.nc\u001b[0m* \u001b[01;32mconus_ens_090.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_013.nc\u001b[0m* \u001b[01;32mconus_ens_039.nc\u001b[0m* \u001b[01;32mconus_ens_065.nc\u001b[0m* \u001b[01;32mconus_ens_091.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_014.nc\u001b[0m* \u001b[01;32mconus_ens_040.nc\u001b[0m* \u001b[01;32mconus_ens_066.nc\u001b[0m* \u001b[01;32mconus_ens_092.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_015.nc\u001b[0m* \u001b[01;32mconus_ens_041.nc\u001b[0m* \u001b[01;32mconus_ens_067.nc\u001b[0m* \u001b[01;32mconus_ens_093.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_016.nc\u001b[0m* \u001b[01;32mconus_ens_042.nc\u001b[0m* \u001b[01;32mconus_ens_068.nc\u001b[0m* \u001b[01;32mconus_ens_094.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_017.nc\u001b[0m* \u001b[01;32mconus_ens_043.nc\u001b[0m* \u001b[01;32mconus_ens_069.nc\u001b[0m* \u001b[01;32mconus_ens_095.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_018.nc\u001b[0m* \u001b[01;32mconus_ens_044.nc\u001b[0m* \u001b[01;32mconus_ens_070.nc\u001b[0m* \u001b[01;32mconus_ens_096.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_019.nc\u001b[0m* \u001b[01;32mconus_ens_045.nc\u001b[0m* \u001b[01;32mconus_ens_071.nc\u001b[0m* \u001b[01;32mconus_ens_097.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_020.nc\u001b[0m* \u001b[01;32mconus_ens_046.nc\u001b[0m* \u001b[01;32mconus_ens_072.nc\u001b[0m* \u001b[01;32mconus_ens_098.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_021.nc\u001b[0m* \u001b[01;32mconus_ens_047.nc\u001b[0m* \u001b[01;32mconus_ens_073.nc\u001b[0m* \u001b[01;32mconus_ens_099.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_022.nc\u001b[0m* \u001b[01;32mconus_ens_048.nc\u001b[0m* \u001b[01;32mconus_ens_074.nc\u001b[0m* \u001b[01;32mconus_ens_100.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_023.nc\u001b[0m* \u001b[01;32mconus_ens_049.nc\u001b[0m* \u001b[01;32mconus_ens_075.nc\u001b[0m* \u001b[01;32mconus_ens_mean.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_024.nc\u001b[0m* \u001b[01;32mconus_ens_050.nc\u001b[0m* \u001b[01;32mconus_ens_076.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_025.nc\u001b[0m* \u001b[01;32mconus_ens_051.nc\u001b[0m* \u001b[01;32mconus_ens_077.nc\u001b[0m*\r\n", | |
"\u001b[01;32mconus_ens_026.nc\u001b[0m* \u001b[01;32mconus_ens_052.nc\u001b[0m* \u001b[01;32mconus_ens_078.nc\u001b[0m*\r\n" | |
] | |
} | |
], | |
"source": [ | |
"ls gcs/newmann-met-ensemble-netcdf/" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 4.62 s, sys: 156 ms, total: 4.77 s\n", | |
"Wall time: 2min 24s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"ds = xr.open_mfdataset('/home/ubuntu/gcs/newmann-met-ensemble-netcdf/conus_ens_*.nc',\n", | |
" engine='netcdf4', concat_dim='ensemble', chunks={'time': 50})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 24 ms, sys: 8 ms, total: 32 ms\n", | |
"Wall time: 559 ms\n", | |
"CPU times: user 184 ms, sys: 8 ms, total: 192 ms\n", | |
"Wall time: 197 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%time ds2 = xr.open_dataset('gcs/newmann-met-ensemble-netcdf/conus_ens_001.nc', chunks={'time': 20})\n", | |
"%time ds2 = xr.open_dataset('gcs/newmann-met-ensemble-netcdf/conus_ens_001.nc', chunks={'time': 20})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 384 ms, sys: 72 ms, total: 456 ms\n", | |
"Wall time: 1.73 s\n", | |
"CPU times: user 364 ms, sys: 64 ms, total: 428 ms\n", | |
"Wall time: 427 ms\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"83.1488" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = ds2.t_mean.data\n", | |
"%time x[500:600, :, :].compute().nbytes / 1e6\n", | |
"%time x[500:600, :, :].compute().nbytes / 1e6" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 40.5 s, sys: 780 ms, total: 41.3 s\n", | |
"Wall time: 1min 48s\n", | |
"CPU times: user 40 s, sys: 728 ms, total: 40.8 s\n", | |
"Wall time: 40.5 s\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"0.096432" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%time x[:, 0, 0].compute().nbytes / 1e6\n", | |
"%time x[:, 0, 0].compute().nbytes / 1e6" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"9558.4453125" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x.nbytes / 2**20" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/ubuntu/miniconda3/lib/python3.6/site-packages/xarray/core/common.py:594: FutureWarning: pd.TimeGrouper is deprecated and will be removed; Please use pd.Grouper(freq=...)\n", | |
" label=label, base=base)\n" | |
] | |
}, | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "b3920a0d46254506b01dc55e695482eb", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/html": [ | |
"<p>Failed to display Jupyter Widget of type <code>VBox</code>.</p>\n", | |
"<p>\n", | |
" If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n", | |
" that the widgets JavaScript is still loading. If this message persists, it\n", | |
" likely means that the widgets JavaScript library is either not installed or\n", | |
" not enabled. See the <a href=\"https://ipywidgets.readthedocs.io/en/stable/user_install.html\">Jupyter\n", | |
" Widgets Documentation</a> for setup instructions.\n", | |
"</p>\n", | |
"<p>\n", | |
" If you're reading this message in another frontend (for example, a static\n", | |
" rendering on GitHub or <a href=\"https://nbviewer.jupyter.org/\">NBViewer</a>),\n", | |
" it may mean that your frontend doesn't currently support widgets.\n", | |
"</p>\n" | |
], | |
"text/plain": [ | |
"VBox()" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"from dask.distributed import progress, Client\n", | |
"c = Client(processes=False, threads_per_worker=2)\n", | |
"da_snow = ds['pcp'].where(ds['t_mean'] < 0.).resample(time='QS-Mar').sum('time')\n", | |
"seasonal_snow = da_snow.isel(ensemble=slice(0, 4)).groupby('time.season').mean('time').persist()\n", | |
"progress(seasonal_snow)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Over 21 min is not good...\n", | |
"\n", | |
"Note that memory usage in the gcsfuse process skyrockets during the above process, but at least things do eventually finish. Why so very many `open_dataset` calls? There are 101 files only." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"ubuntu 4733 0.0 0.0 4500 740 pts/5 Ss+ 18:48 0:00 /bin/sh -c ps aux | grep fuse\r\n", | |
"ubuntu 4737 0.0 0.0 12940 988 pts/5 S+ 18:48 0:00 grep fuse\r\n", | |
"ubuntu 30823 24.0 55.9 18621272 18426452 pts/0 S+ 18:21 6:32 /home/ubuntu/miniconda3/bin/python /home/ubuntu/miniconda3/bin/gcsfuse --no-threads pangeo-data /home/ubuntu/gcs\r\n" | |
] | |
} | |
], | |
"source": [ | |
"!ps aux | grep fuse" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
" " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import gcsfs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"gcs = gcsfs.GCSFileSystem(block_size=5*2**20)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 0 ns, sys: 0 ns, total: 0 ns\n", | |
"Wall time: 57.7 µs\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"{'bucket': 'pangeo-data',\n", | |
" 'contentType': 'application/x-netcdf',\n", | |
" 'crc32c': 'XoQCpA==',\n", | |
" 'etag': 'CKbP2fHq89cCEAE=',\n", | |
" 'generation': '1512510015825830',\n", | |
" 'id': 'pangeo-data/newmann-met-ensemble-netcdf/conus_ens_001.nc/1512510015825830',\n", | |
" 'kind': 'storage#object',\n", | |
" 'md5Hash': 'izarcra7JosDSHLCEZvReg==',\n", | |
" 'mediaLink': 'https://www.googleapis.com/download/storage/v1/b/pangeo-data/o/newmann-met-ensemble-netcdf%2Fconus_ens_001.nc?generation=1512510015825830&alt=media',\n", | |
" 'metageneration': '1',\n", | |
" 'name': 'newmann-met-ensemble-netcdf/conus_ens_001.nc',\n", | |
" 'path': 'pangeo-data/newmann-met-ensemble-netcdf/conus_ens_001.nc',\n", | |
" 'selfLink': 'https://www.googleapis.com/storage/v1/b/pangeo-data/o/newmann-met-ensemble-netcdf%2Fconus_ens_001.nc',\n", | |
" 'size': 14838739901,\n", | |
" 'storageClass': 'REGIONAL',\n", | |
" 'timeCreated': '2017-12-05T21:40:15.819Z',\n", | |
" 'timeStorageClassUpdated': '2017-12-05T21:40:15.819Z',\n", | |
" 'updated': '2017-12-05T21:40:15.819Z'}" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"gcs.ls('pangeo-data/newmann-met-ensemble-netcdf/')\n", | |
"%time gcs.info('pangeo-data/newmann-met-ensemble-netcdf/conus_ens_001.nc')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 532 ms, sys: 212 ms, total: 744 ms\n", | |
"Wall time: 1.03 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"with gcs.open('pangeo-data/newmann-met-ensemble-netcdf/conus_ens_001.nc', 'rb') as f:\n", | |
" f.seek(-700 * 2 ** 20, 2)\n", | |
" f.read(100 * 2**20)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.00user 0.35system 0:00.97elapsed 36%CPU (0avgtext+0avgdata 1840maxresident)k\r\n", | |
"205360inputs+0outputs (1major+76minor)pagefaults 0swaps\r\n" | |
] | |
} | |
], | |
"source": [ | |
"!time head -c 104857600 ~/gcs/newmann-met-ensemble-netcdf/conus_ens_001.nc > /dev/null" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# 2.32s for 100MB " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 392 ms, sys: 0 ns, total: 392 ms\n", | |
"Wall time: 2.87 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"with open('/home/ubuntu/gcs/newmann-met-ensemble-netcdf/conus_ens_001.nc', 'rb') as f:\n", | |
" f.seek(-700 * 2 ** 20, 2)\n", | |
" f.read(100 * 2**20)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 72 ms, sys: 0 ns, total: 72 ms\n", | |
"Wall time: 502 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"with open('/home/ubuntu/gcs/newmann-met-ensemble-netcdf/conus_ens_001.nc', 'rb') as f:\n", | |
" f.seek(-700 * 2 ** 20, 2)\n", | |
" f.read(10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment