Created
April 14, 2018 11:46
-
-
Save rsignell-usgs/4a54ea152d4e10a14deff516bf597015 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Write National Water Model (NWM) model data to Zarr" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from dask.distributed import Client, progress, LocalCluster\n", | |
"import pandas as pd\n", | |
"import xarray as xr\n", | |
"import s3fs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table style=\"border: 2px solid white;\">\n", | |
"<tr>\n", | |
"<td style=\"vertical-align: top; border: 0px solid white\">\n", | |
"<h3>Client</h3>\n", | |
"<ul>\n", | |
" <li><b>Scheduler: </b>tcp://127.0.0.1:38824\n", | |
" <li><b>Dashboard: </b><a href='http://127.0.0.1:42695/status' target='_blank'>http://127.0.0.1:42695/status</a>\n", | |
"</ul>\n", | |
"</td>\n", | |
"<td style=\"vertical-align: top; border: 0px solid white\">\n", | |
"<h3>Cluster</h3>\n", | |
"<ul>\n", | |
" <li><b>Workers: </b>2</li>\n", | |
" <li><b>Cores: </b>2</li>\n", | |
" <li><b>Memory: </b>8.37 GB</li>\n", | |
"</ul>\n", | |
"</td>\n", | |
"</tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<Client: scheduler='tcp://127.0.0.1:38824' processes=2 cores=2>" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# depends on the machine you are using\n", | |
"cluster = LocalCluster()\n", | |
"client = Client(cluster)\n", | |
"\n", | |
"client " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"root = 'http://tds.renci.org:8080/thredds/dodsC/nwm/forcing_short_range/'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dates = pd.date_range(start='2018-04-07T18:00', end='2018-04-08T04:00', freq='H')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"urls = ['{}{}/nwm.t{}z.short_range.forcing.f001.conus.nc'.format(root,a.strftime('%Y%m%d'),a.strftime('%H')) for a in dates]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 480 ms, sys: 116 ms, total: 596 ms\n", | |
"Wall time: 8.58 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time \n", | |
"ds = xr.open_mfdataset(urls,concat_dim='time')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ds = ds.drop(['ProjectionCoordinateSystem'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<xarray.Dataset>\n", | |
"Dimensions: (nv: 2, reference_time: 11, time: 11, x: 4608, y: 3840)\n", | |
"Coordinates:\n", | |
" * reference_time (reference_time) datetime64[ns] 2018-04-07T18:00:00 ...\n", | |
" * x (x) float64 -2.304e+06 -2.303e+06 -2.302e+06 -2.301e+06 ...\n", | |
" * y (y) float64 -1.92e+06 -1.919e+06 -1.918e+06 -1.917e+06 ...\n", | |
" * time (time) datetime64[ns] 2018-04-07T19:00:00 ...\n", | |
"Dimensions without coordinates: nv\n", | |
"Data variables:\n", | |
" time_bounds (time, nv) datetime64[ns] dask.array<shape=(11, 2), chunksize=(1, 2)>\n", | |
" T2D (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" LWDOWN (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" Q2D (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" U2D (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" V2D (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" PSFC (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" RAINRATE (time, y, x) float32 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" SWDOWN (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
"Attributes:\n", | |
" model_initialization_time: 2018-04-07_18:00:00\n", | |
" model_output_valid_time: 2018-04-07_19:00:00\n", | |
" DODS.strlen: 0\n", | |
" DODS_EXTRA.Unlimited_Dimension: time" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ds" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fs = s3fs.S3FileSystem(anon=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"f_zarr = 'rsignell/nwm/test03'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"d = s3fs.S3Map(f_zarr, s3=fs)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 26.3 s, sys: 5.69 s, total: 32 s\n", | |
"Wall time: 12min\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"<xarray.backends.zarr.ZarrStore at 0x7fdc40044e10>" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%time ds.to_zarr(store=d, mode='w')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Test to see if we can read what we wrote" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ds2 = xr.open_zarr(d)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<xarray.Dataset>\n", | |
"Dimensions: (nv: 2, reference_time: 11, time: 11, x: 4608, y: 3840)\n", | |
"Coordinates:\n", | |
" * reference_time (reference_time) datetime64[ns] 2018-04-07T18:00:00 ...\n", | |
" * time (time) datetime64[ns] 2018-04-07T19:00:00 ...\n", | |
" * x (x) float64 -2.304e+06 -2.303e+06 -2.302e+06 -2.301e+06 ...\n", | |
" * y (y) float64 -1.92e+06 -1.919e+06 -1.918e+06 -1.917e+06 ...\n", | |
"Dimensions without coordinates: nv\n", | |
"Data variables:\n", | |
" LWDOWN (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" PSFC (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" Q2D (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" RAINRATE (time, y, x) float32 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" SWDOWN (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" T2D (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" U2D (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" V2D (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" time_bounds (time, nv) datetime64[ns] dask.array<shape=(11, 2), chunksize=(11, 2)>\n", | |
"Attributes:\n", | |
" DODS.strlen: 0\n", | |
" DODS_EXTRA.Unlimited_Dimension: time\n", | |
" model_initialization_time: 2018-04-07_18:00:00\n", | |
" model_output_valid_time: 2018-04-07_19:00:00" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ds2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [default]", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment