Skip to content

Instantly share code, notes, and snippets.

@rsignell-usgs
Created April 14, 2018 11:46
Show Gist options
  • Save rsignell-usgs/4a54ea152d4e10a14deff516bf597015 to your computer and use it in GitHub Desktop.
Save rsignell-usgs/4a54ea152d4e10a14deff516bf597015 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Write National Water Model (NWM) model data to Zarr"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from dask.distributed import Client, progress, LocalCluster\n",
"import pandas as pd\n",
"import xarray as xr\n",
"import s3fs"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table style=\"border: 2px solid white;\">\n",
"<tr>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3>Client</h3>\n",
"<ul>\n",
" <li><b>Scheduler: </b>tcp://127.0.0.1:38824\n",
" <li><b>Dashboard: </b><a href='http://127.0.0.1:42695/status' target='_blank'>http://127.0.0.1:42695/status</a>\n",
"</ul>\n",
"</td>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3>Cluster</h3>\n",
"<ul>\n",
" <li><b>Workers: </b>2</li>\n",
" <li><b>Cores: </b>2</li>\n",
" <li><b>Memory: </b>8.37 GB</li>\n",
"</ul>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<Client: scheduler='tcp://127.0.0.1:38824' processes=2 cores=2>"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# depends on the machine you are using\n",
"cluster = LocalCluster()\n",
"client = Client(cluster)\n",
"\n",
"client "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"root = 'http://tds.renci.org:8080/thredds/dodsC/nwm/forcing_short_range/'"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"dates = pd.date_range(start='2018-04-07T18:00', end='2018-04-08T04:00', freq='H')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"urls = ['{}{}/nwm.t{}z.short_range.forcing.f001.conus.nc'.format(root,a.strftime('%Y%m%d'),a.strftime('%H')) for a in dates]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 480 ms, sys: 116 ms, total: 596 ms\n",
"Wall time: 8.58 s\n"
]
}
],
"source": [
"%%time \n",
"ds = xr.open_mfdataset(urls,concat_dim='time')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"ds = ds.drop(['ProjectionCoordinateSystem'])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<xarray.Dataset>\n",
"Dimensions: (nv: 2, reference_time: 11, time: 11, x: 4608, y: 3840)\n",
"Coordinates:\n",
" * reference_time (reference_time) datetime64[ns] 2018-04-07T18:00:00 ...\n",
" * x (x) float64 -2.304e+06 -2.303e+06 -2.302e+06 -2.301e+06 ...\n",
" * y (y) float64 -1.92e+06 -1.919e+06 -1.918e+06 -1.917e+06 ...\n",
" * time (time) datetime64[ns] 2018-04-07T19:00:00 ...\n",
"Dimensions without coordinates: nv\n",
"Data variables:\n",
" time_bounds (time, nv) datetime64[ns] dask.array<shape=(11, 2), chunksize=(1, 2)>\n",
" T2D (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" LWDOWN (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" Q2D (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" U2D (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" V2D (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" PSFC (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" RAINRATE (time, y, x) float32 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" SWDOWN (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
"Attributes:\n",
" model_initialization_time: 2018-04-07_18:00:00\n",
" model_output_valid_time: 2018-04-07_19:00:00\n",
" DODS.strlen: 0\n",
" DODS_EXTRA.Unlimited_Dimension: time"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"fs = s3fs.S3FileSystem(anon=False)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"f_zarr = 'rsignell/nwm/test03'"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"d = s3fs.S3Map(f_zarr, s3=fs)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 26.3 s, sys: 5.69 s, total: 32 s\n",
"Wall time: 12min\n"
]
},
{
"data": {
"text/plain": [
"<xarray.backends.zarr.ZarrStore at 0x7fdc40044e10>"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%time ds.to_zarr(store=d, mode='w')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test to see if we can read what we wrote"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"ds2 = xr.open_zarr(d)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<xarray.Dataset>\n",
"Dimensions: (nv: 2, reference_time: 11, time: 11, x: 4608, y: 3840)\n",
"Coordinates:\n",
" * reference_time (reference_time) datetime64[ns] 2018-04-07T18:00:00 ...\n",
" * time (time) datetime64[ns] 2018-04-07T19:00:00 ...\n",
" * x (x) float64 -2.304e+06 -2.303e+06 -2.302e+06 -2.301e+06 ...\n",
" * y (y) float64 -1.92e+06 -1.919e+06 -1.918e+06 -1.917e+06 ...\n",
"Dimensions without coordinates: nv\n",
"Data variables:\n",
" LWDOWN (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" PSFC (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" Q2D (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" RAINRATE (time, y, x) float32 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" SWDOWN (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" T2D (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" U2D (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" V2D (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" time_bounds (time, nv) datetime64[ns] dask.array<shape=(11, 2), chunksize=(11, 2)>\n",
"Attributes:\n",
" DODS.strlen: 0\n",
" DODS_EXTRA.Unlimited_Dimension: time\n",
" model_initialization_time: 2018-04-07_18:00:00\n",
" model_output_valid_time: 2018-04-07_19:00:00"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment