Created
October 2, 2024 17:51
-
-
Save arbennett/22419d69920037e1623bf02e8814bde6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"import dask\n", | |
"import fsspec\n", | |
"import tempfile\n", | |
"import xarray as xr\n", | |
"import pandas as pd\n", | |
"from pathlib import Path\n", | |
"\n", | |
"from tqdm.autonotebook import tqdm\n", | |
"from dask.distributed import Client, LocalCluster\n", | |
"from dask.diagnostics import ProgressBar\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"cluster = LocalCluster(n_workers=4, threads_per_worker=1, dashboard_address=4321)\n", | |
"client = Client(cluster)\n", | |
"client" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def assemble_date_uris(\n", | |
" fs_s3: fsspec.AbstractFileSystem,\n", | |
" bucket: str,\n", | |
" year: int,\n", | |
" ens_member: str,\n", | |
" period: str,\n", | |
" varname: str\n", | |
"):\n", | |
" baseline_url = f'{bucket}/GEFSv12/reforecast/{year}'\n", | |
" dates = fs_s3.glob(f'{baseline_url}/??????????/')\n", | |
" dates = [Path(date).name for date in dates]\n", | |
" all_uris = [\n", | |
" f'{baseline_url}/{date}/{ens_member}/{period}/{varname}_{date}_{ens_member}.grib2'\n", | |
" for date in dates\n", | |
" ]\n", | |
" return all_uris\n", | |
"\n", | |
"\n", | |
"@dask.delayed\n", | |
"def get_ds_from_uri(uri, selectors, iselectors):\n", | |
" filename = uri.split('/')[-1]\n", | |
" tmpdir = tempfile.TemporaryDirectory()\n", | |
" with fsspec.open(uri, s3={'anon': True}) as f:\n", | |
" with open(f'{tmpdir.name}/{filename}', 'wb') as g:\n", | |
" g.write(f.read())\n", | |
" ds = xr.open_dataset(f'{tmpdir.name}/{filename}', engine='cfgrib')\n", | |
" ds = ds.sel(**selectors).isel(**iselectors).load()\n", | |
" return ds\n", | |
"\n", | |
"\n", | |
"def get_gefs_data(\n", | |
" fs_s3,\n", | |
" bucket,\n", | |
" year, \n", | |
" ens_member, \n", | |
" period, \n", | |
" varname, \n", | |
" selectors, \n", | |
" iselectors,\n", | |
"):\n", | |
" uris = assemble_date_uris(\n", | |
" fs_s3, bucket, year, ens_member, period, varname\n", | |
" )\n", | |
" ds_list = dask.compute(\n", | |
" [get_ds_from_uri(uri, selectors, iselectors) for uri in uris]\n", | |
" )[0]\n", | |
" ds = xr.concat(ds_list, dim='time')\n", | |
" return ds" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# For the GEFS data, we will use the following S3 bucket:\n", | |
"# https://noaa-gefs-retrospective.s3.amazonaws.com/index.html\n", | |
"fs_s3 = fsspec.filesystem('s3', anon = True)\n", | |
"bucket = 's3://noaa-gefs-retrospective'\n", | |
"\n", | |
"# Other metadata used to access the data\n", | |
"year = 2005\n", | |
"ens_member = 'c00'\n", | |
"period = 'Days:1-10'\n", | |
"varname = 'spfh_2m'\n", | |
"\n", | |
"fvar_to_dvar = {\n", | |
" 'tmp_2m': 't2m', # 2m temperature\n", | |
" 'apcp_sfc': 'tp', # Total precipitation\n", | |
" 'dlwrf_sfc': 'dlwrf', # Downward longwave radiation flux\n", | |
" 'dswrf_sfc': 'dswrf', # Downward shortwave radiation flux\n", | |
" 'pres_sfc': 'pa', # Surface pressure\n", | |
" 'spfh_2m': 'sh2', # 2m specific humidity\n", | |
"}\n", | |
"wna_selectors = {\n", | |
" 'latitude': slice(50, 30),\n", | |
" 'longitude': slice(230, 275),\n", | |
"}\n", | |
"forecast_selectors = {\n", | |
" 'step': 0\n", | |
"}\n", | |
"ds_varname = fvar_to_dvar[varname]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ds = get_gefs_data(\n", | |
" fs_s3,\n", | |
" bucket,\n", | |
" year, \n", | |
" ens_member, \n", | |
" period, \n", | |
" varname, \n", | |
" wna_selectors, \n", | |
" forecast_selectors,\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ds" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "noaa_owp", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment