Last active
May 24, 2021 12:22
-
-
Save leifdenby/de893d663c0643a0a0388583f93fd388 to your computer and use it in GitHub Desktop.
Example CMIP load from JASMIN
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Utility functions for contructing paths and finding files for CMIP6 data | |
Leif Denby, v3, 24/5/2021 | |
Historical runs (1850-2014) are in | |
`<data_root>/CMIP/<institute>/<model>/historical/<variant>/<table_id>/<variable>/<grid>/<version>/` | |
Scenario runs are in | |
`<data_root>/CMIP6/ScenarioMIP/<institute>/<model>/<scenario_name>/<variant>/<table_id>/<variable>/<grid>/<version>/` | |
""" | |
from pathlib import Path | |
from enum import Enum | |
# computing centre dictionary | |
INSTITUTES = { | |
"UKESM1-0-LL": "MOHC", | |
"CNRM-CM6-1": "CNRM-CERFACS", | |
"MRI-ESM2-0": "MRI", | |
"GFDL-CM4": "NOAA-GFDL", | |
"GISS-E2-1-G": "NASA-GISS", | |
"HadGEM3-GC31-LL": "MOHC", | |
"TaiESM1": "AS-RCEC", | |
"AWI-CM-1-1-MR": "AWI", | |
"BCC-CSM2-MR": "BCC", | |
"BCC-ESM1": "BCC", | |
"CAMS-CSM1-0": "CAMS", | |
"FGOALS-f3-L": "CAS", | |
"FGOALS-g3": "CAS", | |
"CanESM5": "CCCma", | |
"CNRM-ESM2-1": "CNRM-CERFACS", | |
"ACCESS-ESM1-5": "CSIRO", | |
"ACCESS-CM2": "CSIRO-ARCCSS", | |
"FIO-ESM-2-0": "FIO-QLNM", | |
"MPI-ESM-1-2-HAM": "HAMMOZ-Consortium", | |
"INM-CM4-8": "INM", | |
"INM-CM5-0": "INM", | |
"IPSL-CM6A-LR": "IPSL", | |
"MIROC6": "MIROC", | |
"HadGEM3-GC31-MM": "MOHC", | |
"MPI-ESM1-2-HR": "MPI-M", | |
"MPI-ESM1-2-LR": "MPI-M", | |
"CESM2": "NCAR", | |
"NorCPM1": "NCC", | |
"NorESM2-LM": "NCC", | |
"NorESM2-MM": "NCC", | |
"NESM3": "NUIST", | |
"SAM0-UNICON": "SNU", | |
} | |
# varient dictionary - use 1st varient | |
MODEL_VARIANTS = { | |
"UKESM1-0-LL": "r1i1p1f2", | |
"CNRM-CM6-1": "r1i1p1f2", | |
"MRI-ESM2-0": "r1i1p1f1", | |
"GFDL-CM4": "r1i1p1f1", | |
"GISS-E2-1-G": "r1i1p1f1", | |
"HadGEM3-GC31-LL": "r1i1p1f3", | |
"TaiESM1": "r1i1p1f1", | |
"AWI-CM-1-1-MR": "r1i1p1f1", | |
"BCC-CSM2-MR": "r1i1p1f1", | |
"BCC-ESM1": "r1i1p1f1", | |
"CAMS-CSM1-0": "r1i1p1f1", | |
"FGOALS-f3-L": "r1i1p1f1", | |
"FGOALS-g3": "r1i1p1f1", | |
"CanESM5": "r1i1p1f1", | |
"CNRM-ESM2-1": "r1i1p1f2", | |
"ACCESS-ESM1-5": "r1i1p1f1", | |
"ACCESS-CM2": "r1i1p1f1", | |
"FIO-ESM-2-0": "r1i1p1f1", | |
"MPI-ESM-1-2-HAM": "r1i1p1f1", | |
"INM-CM4-8": "r1i1p1f1", | |
"INM-CM5-0": "r1i1p1f1", | |
"IPSL-CM6A-LR": "r1i1p1f1", | |
"MIROC6": "r1i1p1f1", | |
"HadGEM3-GC31-MM": "r1i1p1f3", | |
"MPI-ESM1-2-HR": "r1i1p1f1", | |
"MPI-ESM1-2-LR": "r1i1p1f1", | |
"CESM2": "r1i1p1f1", | |
"NorCPM1": "r1i1p1f1", | |
"NorESM2-LM": "r1i1p1f1", | |
"NorESM2-MM": "r1i1p1f1", | |
"NESM3": "r1i1p1f1", | |
"SAM0-UNICON": "r1i1p1f1", | |
} | |
# make an enum to make easier to work out which models are available | |
# so that we can do `Models.ACCESS_CM2` for example | |
Models = Enum( | |
"Models", [(model_name.replace("-", "_"), model_name) for model_name in INSTITUTES.keys()] | |
) | |
DEFAULT_DATA_ROOT = Path("/badc/cmip6/data/CMIP6/") | |
SCENARIOS = dict( | |
historical="CMIP", | |
ssp119="ScenarioMIP", | |
ssp126="ScenarioMIP", | |
ssp245="ScenarioMIP", | |
ssp370="ScenarioMIP", | |
ssp585="ScenarioMIP", | |
) | |
def find_cmip6_file( | |
model, | |
variable, | |
experiment, | |
time_frequency="day", | |
time_range="all", | |
grid="best", | |
version="latest", | |
data_root=DEFAULT_DATA_ROOT, | |
): | |
""" | |
Find a CMIP6 file(s) containing the `variable` for a given `model` and `experiment` | |
If `time_range=='all'` filepaths covering the full time-range will be | |
return otherwise only the file with the matching time-range will be looked | |
for | |
""" | |
if isinstance(model, Models): | |
model = model.value | |
if not model in INSTITUTES: | |
raise NotImplementedError(f"Please add `{model}` to the list of institutes") | |
institute = INSTITUTES[model] | |
if not model in MODEL_VARIANTS: | |
raise NotImplementedError(f"Please add `{model}` to the list of variants") | |
variant = MODEL_VARIANTS[model] | |
if not experiment in SCENARIOS: | |
raise NotImplementedError( | |
f"`{experiment}` isn't currently in the list of scenarios: " | |
+ ", ".join(SCENARIOS.keys()) | |
) | |
scenario = SCENARIOS[experiment] | |
if time_frequency is None: | |
# TODO: would be nice to provide a full list of the time-frequency | |
# options available for the requested variable here | |
raise Exception( | |
"You should provide a time-frequency that you want the variable on" | |
" for example `day`, `3hr`" | |
) | |
table_id = time_frequency | |
if time_range == "all": | |
time_range = "*" | |
# now we construct the filepaths to try | |
kwarg_sets = [] | |
kwargs = dict( | |
institute=institute, | |
model=model, | |
scenario=scenario, | |
variant=variant, | |
experiment=experiment, | |
table_id=table_id, | |
variable=variable, | |
version=version, | |
time_range=time_range, | |
grid=grid, | |
data_root=data_root, | |
) | |
if grid == "best": | |
for _grid in ["gr", "gn", "gr1"]: | |
kwargs = dict(kwargs) # create copy | |
kwargs["grid"] = _grid | |
kwarg_sets.append(kwargs) | |
else: | |
kwarg_sets.append(kwargs) | |
filepaths = [make_cmip6_filepath(**kwargs) for kwargs in kwarg_sets] | |
def _exists(p): | |
if "*" in p.name: | |
return len(list(p.parent.glob(p.name))) | |
else: | |
return p.exists() | |
if not any([_exists(fp) for fp in filepaths]): | |
raise Exception( | |
"Couldn't find any files for the provided arguments. Tried: {}".format( | |
", ".join([str(fp) for fp in filepaths]) | |
) | |
) | |
for fp in filepaths: | |
if _exists(fp): | |
if "*" in fp.name: | |
return list(fp.parent.glob(fp.name)) | |
else: | |
return fp | |
def make_cmip6_filepath( | |
institute, | |
model, | |
scenario, | |
variant, | |
experiment, | |
table_id, | |
variable, | |
version, | |
time_range, | |
grid, | |
data_root=DEFAULT_DATA_ROOT, | |
): | |
""" | |
Make a file path for a cmip6 dataset for a single variable | |
`scenario_name` is likely to be one of ssp119, ssp126, ssp245, ssp370 or ssp585 | |
`variant` takes the form `r<realiation_id>0<initialization_id>0<physics_id>0<forcing_id>`, | |
e.g. `r1i1p1f2`, where the numbers are the indexes for: | |
**r**ealization, **i**nitialization, **p**hysics, **f**orcing | |
`table_id` generally indicates the frequency of the data, e.g. day, 3hr, Amon | |
`grid` is the model grid being used, e.g. gn, where | |
* `gm`: global mean data | |
* `gn`: data reported on a model's native grid | |
* `gr1`: regridded data reported on a grid other than the native grid | |
and other than the preferred target grid | |
It is likely the `grid` will be the native grid, i.e. `gn`. With | |
`version` normally in the form `v[YYYYMMDD]` or `latest`, e.g. `v20200203 | |
`variable` generally follows the list on | |
https://pcmdi.llnl.gov/mips/cmip3/variableList.html, for example | |
`tas`: air_temperature | |
`pr`: precipitation_flux | |
`ts`: surface_temperature | |
The following institutions have data in both historical and ScenarioMIPs: | |
AS-RCEC, AWI, BCC, CAMS, CAS, CCCR-IITM, CCCma, CMCC, CNRM-CERFACS, CSIRO, | |
CSIRO-ARCCSS, E3SM-Project, EC-Earth-Consortium, FIO-QLNM, | |
HAMMOZ-Consortium, INM, IPSL, KIOST, MIROC, MOHC, MPI-M, MRI, NASA-GISS, | |
NCAR, NCC, NIMS-KMA, NOAA-GFDL, NUIST, THU, UA | |
""" | |
path = ( | |
data_root | |
/ scenario | |
/ institute | |
/ model | |
/ experiment | |
/ variant | |
/ table_id | |
/ variable | |
/ grid | |
/ version | |
) | |
filename = ( | |
f"{variable}_{table_id}_{model}_{experiment}_{variant}_{grid}_{time_range}.nc" | |
) | |
return path / filename | |
def usage_examples(): | |
# getting historical data | |
cmip6.find_cmip6_file(model=cmip6.Models.HadGEM3_GC31_LL, variable="tas", experiment="historical", time_frequency="day") | |
cmip6.find_cmip6_file(model="UKESM1-0-LL", variable="tas", experiment="historical", time_frequency="day") | |
# getting future scenarios | |
cmip6.find_cmip6_file(model="UKESM1-0-LL", variable="tas", experiment="ssp585", time_frequency="day") | |
import xarray as xr | |
da = xr.open_mfdataset(cmip6.find_cmip6_file(model="UKESM1-0-LL", variable="tas", experiment="ssp585", time_frequency="day")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
you're right!