Skip to content

Instantly share code, notes, and snippets.

@tommylees112
Last active September 8, 2021 14:53
Show Gist options
  • Save tommylees112/bd58dc077999aa3b6a383b3433037822 to your computer and use it in GitHub Desktop.
Save tommylees112/bd58dc077999aa3b6a383b3433037822 to your computer and use it in GitHub Desktop.
Regrid data for Ella
import xarray as xr
from pathlib import Path
from tqdm import tqdm
import xesmf as xe
def convert_to_same_grid(reference_ds: xr.Dataset, ds: xr.Dataset, method: str="nearest_s2d") -> xr.Dataset:
""" Use xEMSF package to regrid ds to the same grid as reference_ds """
assert ("lat" in reference_ds.dims) & (
"lon" in reference_ds.dims
), f"Need (lat,lon) in reference_ds dims Currently: {reference_ds.dims}"
assert ("lat" in ds.dims) & (
"lon" in ds.dims
), f"Need (lat,lon) in ds dims Currently: {ds.dims}"
# create the grid you want to convert TO (from reference_ds)
ds_out = xr.Dataset(
{"lat": (["lat"], reference_ds.lat.values), "lon": (["lon"], reference_ds.lon.values)}
)
# create the regridder object
# xe.Regridder(grid_in, grid_out, method='bilinear')
regridder = xe.Regridder(ds, ds_out, method, reuse_weights=False)
# IF it's a dataarray just do the original transformations
if isinstance(ds, xr.core.dataarray.DataArray):
ds = regridder(ds)
# OTHERWISE loop through each of the variables, regrid the datarray then recombine into dataset
elif isinstance(ds, xr.core.dataset.Dataset):
vars = [i for i in ds.var().variables]
if len(vars) == 1:
ds = regridder(ds)
else:
output_dict = {}
# LOOP over each variable and append to dict
for var in vars:
print(f"- regridding var {var} -")
da = ds[var]
da = regridder(da)
output_dict[var] = da
# REBUILD
ds = xr.Dataset(output_dict)
else:
assert False, "This function only works with xarray dataset / dataarray objects"
print(
f"Regridded from {(regridder.shape_in)} to {(regridder.shape_out)}"
)
return ds
def get_varname_base(ds) -> str:
model = ds.attrs["model_id"].replace(" ", "_")
institute = ds.attrs["institute_id"].replace(" ", "_")
varname_base = f"{institute}_{model}"
return varname_base
def rename_vars(ds, varname_base) -> xr.Dataset:
rename_dict = dict(zip(list(ds.data_vars), [f"{varname_base}_{v}" for v in ds.data_vars]))
ds = ds.rename(rename_dict)
return ds
def drop_bnds_variables(ds) -> xr.Dataset:
return ds.drop([v for v in ds.data_vars if "bnds" in v])
if __name__ == "__main__":
RENAME = False
# Which regridding algorithm?
# https://xesmf.readthedocs.io/en/latest/notebooks/Compare_algorithms.html
method: str = "nearest_s2d"
# change this to your data path
data_dir = Path("/home/leest/ella_data")
###########
out_dir = data_dir / "REGRID"
out_dir.mkdir(exist_ok=True)
ref_path = data_dir / "pr_Aclim_MRI-CGCM3_midHolocene.nc"
ref_ds = xr.open_dataset(ref_path)
ref_ds = drop_bnds_variables(ref_ds)
# datasets to regrid
other_datasets = [
xr.open_dataset(f) for f in list(data_dir.glob("*.nc")) if f != ref_path
]
# clean up other_datasets (remove the bnds dimension) [NOTE: don't have to do this...]
clean_datasets = {}
for ds in other_datasets:
ds = drop_bnds_variables(ds)
# create unique variable name from the institue-model name
varname_base = get_varname_base(ds)
clean_datasets[varname_base] = ds
# select one timestep-variable (lat lon only) for the reference grid
reference_ds = ref_ds["pr"].isel(time=0)
regrid_data = {}
pbar = tqdm(clean_datasets.items(), desc="Regridding data")
for varname_base, ds in pbar:
pbar.set_postfix_str(varname_base)
ds_regrid = convert_to_same_grid(reference_ds, clean_datasets[varname_base], method=method)
# rename the variables so we can join into one dataset
if RENAME:
ds_regrid = rename_vars(ds_regrid, varname_base)
regrid_data[varname_base] = ds_regrid
# Do for the reference grid too
varname_base = get_varname_base(ref_ds)
regrid_data[varname_base] = rename_vars(ref_ds, varname_base)
# save each to netcdf
for varname, ds in regrid_data.items():
ds.to_netcdf(out_dir / f"{varname}.nc")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment