Skip to content

Instantly share code, notes, and snippets.

@arbennett
Created October 2, 2024 17:51
Show Gist options
  • Save arbennett/22419d69920037e1623bf02e8814bde6 to your computer and use it in GitHub Desktop.
Save arbennett/22419d69920037e1623bf02e8814bde6 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import dask\n",
"import fsspec\n",
"import tempfile\n",
"import xarray as xr\n",
"import pandas as pd\n",
"from pathlib import Path\n",
"\n",
"from tqdm.autonotebook import tqdm\n",
"from dask.distributed import Client, LocalCluster\n",
"from dask.diagnostics import ProgressBar\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cluster = LocalCluster(n_workers=4, threads_per_worker=1, dashboard_address=4321)\n",
"client = Client(cluster)\n",
"client"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"def assemble_date_uris(\n",
" fs_s3: fsspec.AbstractFileSystem,\n",
" bucket: str,\n",
" year: int,\n",
" ens_member: str,\n",
" period: str,\n",
" varname: str\n",
"):\n",
" baseline_url = f'{bucket}/GEFSv12/reforecast/{year}'\n",
" dates = fs_s3.glob(f'{baseline_url}/??????????/')\n",
" dates = [Path(date).name for date in dates]\n",
" all_uris = [\n",
" f'{baseline_url}/{date}/{ens_member}/{period}/{varname}_{date}_{ens_member}.grib2'\n",
" for date in dates\n",
" ]\n",
" return all_uris\n",
"\n",
"\n",
"@dask.delayed\n",
"def get_ds_from_uri(uri, selectors, iselectors):\n",
" filename = uri.split('/')[-1]\n",
" tmpdir = tempfile.TemporaryDirectory()\n",
" with fsspec.open(uri, s3={'anon': True}) as f:\n",
" with open(f'{tmpdir.name}/{filename}', 'wb') as g:\n",
" g.write(f.read())\n",
" ds = xr.open_dataset(f'{tmpdir.name}/{filename}', engine='cfgrib')\n",
" ds = ds.sel(**selectors).isel(**iselectors).load()\n",
" return ds\n",
"\n",
"\n",
"def get_gefs_data(\n",
" fs_s3,\n",
" bucket,\n",
" year, \n",
" ens_member, \n",
" period, \n",
" varname, \n",
" selectors, \n",
" iselectors,\n",
"):\n",
" uris = assemble_date_uris(\n",
" fs_s3, bucket, year, ens_member, period, varname\n",
" )\n",
" ds_list = dask.compute(\n",
" [get_ds_from_uri(uri, selectors, iselectors) for uri in uris]\n",
" )[0]\n",
" ds = xr.concat(ds_list, dim='time')\n",
" return ds"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# For the GEFS data, we will use the following S3 bucket:\n",
"# https://noaa-gefs-retrospective.s3.amazonaws.com/index.html\n",
"fs_s3 = fsspec.filesystem('s3', anon = True)\n",
"bucket = 's3://noaa-gefs-retrospective'\n",
"\n",
"# Other metadata used to access the data\n",
"year = 2005\n",
"ens_member = 'c00'\n",
"period = 'Days:1-10'\n",
"varname = 'spfh_2m'\n",
"\n",
"fvar_to_dvar = {\n",
" 'tmp_2m': 't2m', # 2m temperature\n",
" 'apcp_sfc': 'tp', # Total precipitation\n",
" 'dlwrf_sfc': 'dlwrf', # Downward longwave radiation flux\n",
" 'dswrf_sfc': 'dswrf', # Downward shortwave radiation flux\n",
" 'pres_sfc': 'pa', # Surface pressure\n",
" 'spfh_2m': 'sh2', # 2m specific humidity\n",
"}\n",
"wna_selectors = {\n",
" 'latitude': slice(50, 30),\n",
" 'longitude': slice(230, 275),\n",
"}\n",
"forecast_selectors = {\n",
" 'step': 0\n",
"}\n",
"ds_varname = fvar_to_dvar[varname]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"ds = get_gefs_data(\n",
" fs_s3,\n",
" bucket,\n",
" year, \n",
" ens_member, \n",
" period, \n",
" varname, \n",
" wna_selectors, \n",
" forecast_selectors,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ds"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "noaa_owp",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment