Last active
September 8, 2021 14:53
-
-
Save tommylees112/bd58dc077999aa3b6a383b3433037822 to your computer and use it in GitHub Desktop.
Regrid data for Ella
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xarray as xr | |
from pathlib import Path | |
from tqdm import tqdm | |
import xesmf as xe | |
def convert_to_same_grid(reference_ds: xr.Dataset, ds: xr.Dataset, method: str="nearest_s2d") -> xr.Dataset: | |
""" Use xEMSF package to regrid ds to the same grid as reference_ds """ | |
assert ("lat" in reference_ds.dims) & ( | |
"lon" in reference_ds.dims | |
), f"Need (lat,lon) in reference_ds dims Currently: {reference_ds.dims}" | |
assert ("lat" in ds.dims) & ( | |
"lon" in ds.dims | |
), f"Need (lat,lon) in ds dims Currently: {ds.dims}" | |
# create the grid you want to convert TO (from reference_ds) | |
ds_out = xr.Dataset( | |
{"lat": (["lat"], reference_ds.lat.values), "lon": (["lon"], reference_ds.lon.values)} | |
) | |
# create the regridder object | |
# xe.Regridder(grid_in, grid_out, method='bilinear') | |
regridder = xe.Regridder(ds, ds_out, method, reuse_weights=False) | |
# IF it's a dataarray just do the original transformations | |
if isinstance(ds, xr.core.dataarray.DataArray): | |
ds = regridder(ds) | |
# OTHERWISE loop through each of the variables, regrid the datarray then recombine into dataset | |
elif isinstance(ds, xr.core.dataset.Dataset): | |
vars = [i for i in ds.var().variables] | |
if len(vars) == 1: | |
ds = regridder(ds) | |
else: | |
output_dict = {} | |
# LOOP over each variable and append to dict | |
for var in vars: | |
print(f"- regridding var {var} -") | |
da = ds[var] | |
da = regridder(da) | |
output_dict[var] = da | |
# REBUILD | |
ds = xr.Dataset(output_dict) | |
else: | |
assert False, "This function only works with xarray dataset / dataarray objects" | |
print( | |
f"Regridded from {(regridder.shape_in)} to {(regridder.shape_out)}" | |
) | |
return ds | |
def get_varname_base(ds) -> str: | |
model = ds.attrs["model_id"].replace(" ", "_") | |
institute = ds.attrs["institute_id"].replace(" ", "_") | |
varname_base = f"{institute}_{model}" | |
return varname_base | |
def rename_vars(ds, varname_base) -> xr.Dataset: | |
rename_dict = dict(zip(list(ds.data_vars), [f"{varname_base}_{v}" for v in ds.data_vars])) | |
ds = ds.rename(rename_dict) | |
return ds | |
def drop_bnds_variables(ds) -> xr.Dataset: | |
return ds.drop([v for v in ds.data_vars if "bnds" in v]) | |
if __name__ == "__main__": | |
RENAME = False | |
# Which regridding algorithm? | |
# https://xesmf.readthedocs.io/en/latest/notebooks/Compare_algorithms.html | |
method: str = "nearest_s2d" | |
# change this to your data path | |
data_dir = Path("/home/leest/ella_data") | |
########### | |
out_dir = data_dir / "REGRID" | |
out_dir.mkdir(exist_ok=True) | |
ref_path = data_dir / "pr_Aclim_MRI-CGCM3_midHolocene.nc" | |
ref_ds = xr.open_dataset(ref_path) | |
ref_ds = drop_bnds_variables(ref_ds) | |
# datasets to regrid | |
other_datasets = [ | |
xr.open_dataset(f) for f in list(data_dir.glob("*.nc")) if f != ref_path | |
] | |
# clean up other_datasets (remove the bnds dimension) [NOTE: don't have to do this...] | |
clean_datasets = {} | |
for ds in other_datasets: | |
ds = drop_bnds_variables(ds) | |
# create unique variable name from the institue-model name | |
varname_base = get_varname_base(ds) | |
clean_datasets[varname_base] = ds | |
# select one timestep-variable (lat lon only) for the reference grid | |
reference_ds = ref_ds["pr"].isel(time=0) | |
regrid_data = {} | |
pbar = tqdm(clean_datasets.items(), desc="Regridding data") | |
for varname_base, ds in pbar: | |
pbar.set_postfix_str(varname_base) | |
ds_regrid = convert_to_same_grid(reference_ds, clean_datasets[varname_base], method=method) | |
# rename the variables so we can join into one dataset | |
if RENAME: | |
ds_regrid = rename_vars(ds_regrid, varname_base) | |
regrid_data[varname_base] = ds_regrid | |
# Do for the reference grid too | |
varname_base = get_varname_base(ref_ds) | |
regrid_data[varname_base] = rename_vars(ref_ds, varname_base) | |
# save each to netcdf | |
for varname, ds in regrid_data.items(): | |
ds.to_netcdf(out_dir / f"{varname}.nc") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment