Created
January 10, 2024 19:14
-
-
Save nocollier/52034b55a57ad81b3f86d55c511c8ea6 to your computer and use it in GitHub Desktop.
Possible CMIP test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""ESGF could consider maintaining a database of mean and standard deivations for each | |
variable in each experiment/table. There should be a public API to this so tools could | |
be created to test model data that is to be submitted. For example, a z-statistic could | |
be used: | |
z = | (submission_mean - esgf_mean) / sqrt(submission_std**2 + esgf_std**2) | | |
If z < 2, then the distributions are to be considered similar. Perhaps there are better | |
statistical tests that could be performed if ESGF does the work of providing these | |
important scalars. | |
""" | |
import numpy as np | |
from intake_esgf import ESGFCatalog | |
# Define some constants | |
VARIABLE = "tas" | |
NUM_TIME = 24 # only the last 2 years to keep it fast | |
# Use intake-esgf to find some data across sources | |
cat = ESGFCatalog().search( | |
experiment_id="historical", table_id="Amon", variable_id="tas" | |
) | |
cat.remove_ensembles() # only take the 'first' ensemble, just to keep it small | |
dsd = cat.to_dataset_dict(add_measures=False) # download into a big dictionary | |
# Now compute the mean and standard deviation of the variable in the last `NUM_TIME` | |
# time slices. This is what ESGF would provide already precomputed. | |
var_sum = 0.0 | |
var_sum_squared = 0.0 | |
n = 0 | |
for key, ds in dsd.items(): | |
da = ds["tas"].isel(time=slice(-NUM_TIME, -1)).load() | |
var_sum += da.sum() | |
var_sum_squared += (da * da).sum() | |
n += da.size | |
mean = var_sum / n | |
std = np.sqrt((var_sum_squared - var_sum * var_sum / n) / (n - 1)) | |
# Now loop back through the models and randomly pick one to have values that are in the | |
# wrong units. This is just meant to simulate what could go wrong. | |
bad_one = np.random.randint(low=0, high=len(dsd) - 1) | |
for i, (key, ds) in enumerate(dsd.items()): | |
da = ds["tas"].isel(time=slice(-NUM_TIME, -1)).load() - (i == bad_one) * 273.15 | |
mu = da.mean().values | |
sigma = da.std().values | |
z = np.abs((mu - mean) / np.sqrt(std * std + sigma * sigma)) | |
print(f"{key:>50} mean={mu:.2f} std={sigma:.2f} {z=:.2f}") | |
# All of the correct models have z ~ 0, but the wrong units z = 9. It is really easy to | |
# see a model that is anomalous. We could then highlight entries where z > threshold for | |
# the community to consider because they are significantly different than the rest. | |
""" | |
MRI.MRI-ESM2-0.r1i1p1f1.gn mean=279.04 std=21.10 z=0.01 | |
MPI-M.MPI-ESM1-2-HR.r1i1p1f1.gn mean=279.19 std=21.37 z=0.00 | |
MPI-M.ICON-ESM-LR.r1i1p1f1.gn mean=288.30 std=16.12 z=0.34 | |
NCAR.CESM2-FV2.r1i1p1f1.gn mean=5.62 std=21.90 z=9.02 <---- | |
SNU.SAM0-UNICON.r1i1p1f1.gn mean=276.58 std=22.81 z=0.09 | |
MPI-M.MPI-ESM1-2-LR.r1i1p1f1.gn mean=278.98 std=20.77 z=0.01 | |
NCAR.CESM2-WACCM-FV2.r1i1p1f1.gn mean=278.74 std=21.89 z=0.02 | |
NCAR.CESM2-WACCM.r1i1p1f1.gn mean=278.84 std=21.54 z=0.01 | |
NOAA-GFDL.GFDL-ESM4.r1i1p1f1.gr1 mean=278.21 std=21.85 z=0.03 | |
EC-Earth-Consortium.EC-Earth3-Veg-LR.r1i1p1f1.gr mean=279.48 std=19.98 z=0.01 | |
AWI.AWI-CM-1-1-MR.r1i1p1f1.gn mean=279.11 std=21.24 z=0.00 | |
AWI.AWI-ESM-1-1-LR.r1i1p1f1.gn mean=278.71 std=20.95 z=0.02 | |
AS-RCEC.TaiESM1.r1i1p1f1.gn mean=278.25 std=22.45 z=0.03 | |
CAS.FGOALS-g3.r1i1p1f1.gn mean=280.57 std=20.88 z=0.05 | |
MIROC.MIROC-ES2H.r1i1p1f2.gn mean=279.72 std=20.64 z=0.02 | |
CCCma.CanESM5-CanOE.r1i1p2f1.gn mean=279.23 std=21.61 z=0.00 | |
NASA-GISS.GISS-E3-G.r1i1p1f1.gr mean=278.51 std=20.27 z=0.02 | |
MOHC.HadGEM3-GC31-MM.r1i1p1f3.gn mean=279.09 std=21.21 z=0.00 | |
E3SM-Project.E3SM-1-0.r1i1p1f1.gr mean=278.57 std=20.80 z=0.02 | |
NCC.NorESM2-LM.r1i1p1f1.gn mean=278.81 std=21.58 z=0.01 | |
EC-Earth-Consortium.EC-Earth3-AerChem.r1i1p1f1.gr mean=279.77 std=19.76 z=0.02 | |
EC-Earth-Consortium.EC-Earth3.r1i1p1f1.gr mean=280.17 std=19.58 z=0.03 | |
NASA-GISS.GISS-E2-1-G.r1i1p1f1.gn mean=278.33 std=22.36 z=0.03 | |
FIO-QLNM.FIO-ESM-2-0.r1i1p1f1.gn mean=278.24 std=22.17 z=0.03 | |
EC-Earth-Consortium.EC-Earth3-CC.r1i1p1f1.gr mean=280.69 std=19.46 z=0.05 | |
EC-Earth-Consortium.EC-Earth3-Veg.r1i1p1f1.gr mean=279.86 std=19.82 z=0.02 | |
THU.CIESM.r1i1p1f1.gr mean=279.91 std=21.24 z=0.02 | |
CNRM-CERFACS.CNRM-CM6-1.r1i1p1f2.gr mean=277.88 std=21.95 z=0.04 | |
CMCC.CMCC-CM2-HR4.r1i1p1f1.gn mean=279.30 std=21.17 z=0.00 | |
NASA-GISS.GISS-E2-2-H.r1i1p1f1.gn mean=277.12 std=22.08 z=0.07 | |
NASA-GISS.GISS-E2-1-G-CC.r1i1p1f1.gn mean=278.47 std=22.18 z=0.02 | |
NCAR.CESM2.r1i1p1f1.gn mean=279.03 std=21.56 z=0.01 | |
NOAA-GFDL.GFDL-CM4.r1i1p1f1.gr1 mean=277.83 std=21.53 z=0.05 | |
NIMS-KMA.KACE-1-0-G.r1i1p1f1.gr mean=278.98 std=21.73 z=0.01 | |
HAMMOZ-Consortium.MPI-ESM-1-2-HAM.r1i1p1f1.gn mean=279.18 std=20.45 z=0.00 | |
NCC.NorESM2-MM.r1i1p1f1.gn mean=278.47 std=21.80 z=0.03 | |
INM.INM-CM5-0.r1i1p1f1.gr1 mean=278.23 std=21.56 z=0.03 | |
CNRM-CERFACS.CNRM-ESM2-1.r1i1p1f2.gr mean=278.85 std=21.38 z=0.01 | |
INM.INM-CM4-8.r1i1p1f1.gr1 mean=278.03 std=22.02 z=0.04 | |
CMCC.CMCC-ESM2.r1i1p1f1.gn mean=279.42 std=21.43 z=0.01 | |
IPSL.IPSL-CM6A-LR.r1i1p1f1.gr mean=278.04 std=22.22 z=0.04 | |
NASA-GISS.GISS-E2-1-H.r1i1p1f1.gn mean=278.94 std=21.59 z=0.01 | |
BCC.BCC-ESM1.r1i1p1f1.gn mean=279.53 std=21.94 z=0.01 | |
MOHC.HadGEM3-GC31-LL.r1i1p1f3.gn mean=278.70 std=21.84 z=0.02 | |
MIROC.MIROC6.r1i1p1f1.gn mean=281.09 std=20.11 z=0.06 | |
MIROC.MIROC-ES2L.r1i1p1f2.gn mean=281.24 std=19.00 z=0.07 | |
MOHC.UKESM1-0-LL.r1i1p1f2.gn mean=278.18 std=22.26 z=0.03 | |
KIOST.KIOST-ESM.r1i1p1f1.gr1 mean=278.18 std=20.73 z=0.04 | |
BCC.BCC-CSM2-MR.r1i1p1f1.gn mean=279.86 std=21.15 z=0.02 | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment