Skip to content

Instantly share code, notes, and snippets.

@andersy005
Last active March 18, 2019 02:13
Show Gist options
  • Select an option

  • Save andersy005/02c48c18896a06b018f37f5e7cab3acd to your computer and use it in GitHub Desktop.

Select an option

Save andersy005/02c48c18896a06b018f37f5e7cab3acd to your computer and use it in GitHub Desktop.
"""Aggregattion functions for building xarray datasets from search results.
"""
from functools import reduce
import xarray as xr
import pandas as pd
import dask
from tqdm.autonotebook import tqdm, trange
from datetime import datetime
def dict_union(*dicts, merge_keys=['history', 'further_info_url'],
drop_keys=['DODS_EXTRA.Unlimited_Dimension']):
if len(dicts) > 2:
return reduce(dict_union, dicts)
elif len(dicts)==2:
d1, d2 = dicts
d = type(d1)()
# union
all_keys = set(d1) | set(d2)
for k in all_keys:
v1 = d1.get(k)
v2 = d2.get(k)
if (v1 is None and v2 is None) or k in drop_keys:
pass
elif v1 is None:
d[k] = v2
elif v2 is None:
d[k] = v1
elif v1==v2:
d[k] = v1
elif k in merge_keys:
d[k] = '\n'.join([v1, v2])
return d
elif len(dicts)==1:
return dicts[0]
def set_bnds_as_coords(ds):
new_coords_vars = [var for var in ds.data_vars if 'bnds' in var or 'bounds' in var]
ds = ds.set_coords(new_coords_vars)
return ds
def fix_climatology_time(ds):
for dim in ds.dims:
if 'climatology' in ds[dim].attrs:
ds = ds.rename({dim: dim + '_climatology'})
return ds
def set_coords(ds):
# there should only be one variable per file
# everything else is coords
varname = ds.attrs['variable_id']
coord_vars = set(ds.data_vars) - {varname}
ds = ds.set_coords(coord_vars)
ds = fix_climatology_time(ds)
return(ds)
def open_dataset(url, default_chunk_size='12MiB'):
# try to use smaller chunks
with dask.config.set({'array.chunk-size': '12MiB'}):
ds = xr.open_dataset(url, chunks={'time': 'auto'}, decode_times=False)
ds.attrs['history'] = f"{datetime.now()} xarray.open_dataset('{url}')"
ds = set_coords(ds)
return ds
open_dataset_delayed = dask.delayed(open_dataset)
def concat_timesteps(dsets, timevar='time'):
if len(dsets)==1:
return dsets[0]
attrs = dict_union(*[ds.attrs for ds in dsets])
# for nd-coordinates without time from first ensemble member to simplify merge
first = dsets[0]
def drop_unnecessary_coords(ds):
ndcoords = set(ds.coords) - set(ds.dims)
ndcoords_drop = [coord for coord in ndcoords if timevar not in ds[coord].dims]
return ds.drop(ndcoords_drop)
rest = [drop_unnecessary_coords(ds) for ds in dsets[1:]]
objs_to_concat = [first] + rest
ds = xr.concat(objs_to_concat, dim=timevar, coords='minimal')
attrs['history'] += f"\n{datetime.now()} xarray.concat(<ALL_TIMESTEPS>, dim='{timevar}', coords='minimal')"
ds.attrs = attrs
return ds
def concat_ensembles(member_dsets, member_ids, join='outer'):
if len(member_dsets)==1:
return member_dsets[0]
concat_dim = xr.DataArray(member_ids, dims='member_id', name='member_id')
# warning: this function broke for the IPSL historical o3 variable because it
# contained a mix of frequencies (monthly and climatology)
# this was fixed by adding frequency="mon" to the search
# merge attributes
attrs = dict_union(*[ds.attrs for ds in member_dsets])
# align first to deal with the fact that some ensemble members have different lengths
# inner join keeps only overlapping segments of each ensemble
# outer join gives us the longest possible record
member_dsets_aligned = xr.align(*member_dsets, join=join)
# keep only coordinates from first ensemble member to simplify merge
first = member_dsets_aligned[0]
rest = [mds.reset_coords(drop=True) for mds in member_dsets_aligned[1:]]
objs_to_concat = [first] + rest
ds = xr.concat(objs_to_concat, dim=concat_dim, coords='minimal')
attrs['history'] += f"\n{datetime.now()} xarray.concat(<ALL_MEMBERS>, dim='member_id', coords='minimal')"
ds.attrs = attrs
return ds
def merge_vars(ds1, ds2):
# merge two datasets at a time - designed for recursive merging
# drop all variables from second that already exist in first's coordinates
# I can't believe xarray doesn't have a merge that keeps attrs
attrs = dict_union(ds1.attrs, ds2.attrs)
# non dimension coords
# could be skipping over
ds1_ndcoords = set(ds1.coords) - set(ds1.dims)
# edge case for variable 'ps', which is a coordinate in some datasets
# and a data_var in its own dataset
ds2_dropvars = set(ds2.variables).intersection(ds1_ndcoords)
ds2_drop = ds2.drop(ds2_dropvars)
ds = xr.merge([ds1, ds2_drop])
ds.attrs = attrs
return ds
def merge_recursive(dsets):
dsm = reduce(merge_vars, dsets)
dsm.attrs['history'] += f"\n{datetime.now()} xarray.merge(<ALL_VARIABLES>)"
# fix further_info_url
fi_urls = set(dsm.attrs['further_info_url'].split('\n'))
dsm.attrs['further_info_url'] = '\n'.join(fi_urls)
# rechunk
chunks = {'time': 'auto'}
if 'member_id' in dsm.dims:
chunks.update({'member_id': 1})
if 'time_climatology' in dsm.dims:
chunks.update({'time_climatology': 1})
return dsm.chunk(chunks)
def combine_files(files):
"""Produce a list of xarray datasets from ESGF search output.
Parameters
----------
files : pandas.DataFrame
Output from ``esgf_search``
Returns
-------
all_dsets : dict
Dictionary of xarray datsets. Keys are dataset IDs.
"""
# fields which define a single dataset
dataset_fields = ['institution_id', 'source_id', 'experiment_id', 'table_id', 'grid_label']
all_dsets = {}
for dset_keys, dset_files in tqdm(files.groupby(dataset_fields), desc='dataset'):
dset_id = '.'.join(dset_keys)
all_member_dsets = []
all_member_ids = []
# first build a nested list of delayed datasets
for var_id, var_files in dset_files.groupby('variable_id'):
member_dsets = []
member_ids = []
for m_id, m_files in var_files.groupby('member_id'):
member_ids.append(m_id)
member_dsets.append([open_dataset_delayed(url) for url in m_files.OPENDAP_url])
all_member_dsets.append(member_dsets)
all_member_ids.append(member_ids)
# now compute them all in parallel
all_member_dsets_c = dask.compute(*all_member_dsets, retries=5)
# and merge them
var_dsets = [concat_ensembles([concat_timesteps(time_dsets) for time_dsets in member_dsets],
member_ids)
for member_dsets, member_ids in zip(
tqdm(all_member_dsets_c, desc='ensemble', leave=False), all_member_ids)]
ds = merge_recursive(tqdm(var_dsets, desc='variables', leave=False))
all_dsets[dset_id] = ds
return all_dsets
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/srv/conda/lib/python3.7/site-packages/tqdm/autonotebook/__init__.py:14: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n",
" \" (e.g. in jupyter console)\", TqdmExperimentalWarning)\n",
"/srv/conda/lib/python3.7/site-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n",
" defaults = yaml.load(f)\n"
]
}
],
"source": [
"import search, aggregate"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dataset_id</th>\n",
" <th>id</th>\n",
" <th>version</th>\n",
" <th>activity_drs</th>\n",
" <th>activity_id</th>\n",
" <th>branch_method</th>\n",
" <th>cf_standard_name</th>\n",
" <th>checksum</th>\n",
" <th>checksum_type</th>\n",
" <th>citation_url</th>\n",
" <th>...</th>\n",
" <th>variable_units</th>\n",
" <th>variant_label</th>\n",
" <th>_version_</th>\n",
" <th>retracted</th>\n",
" <th>_timestamp</th>\n",
" <th>score</th>\n",
" <th>HTTPServer_url</th>\n",
" <th>OPENDAP_url</th>\n",
" <th>GridFTP_url</th>\n",
" <th>Globus_url</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r1i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r1i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>3dc95d0a4cdfbf78e958b63cc749979be9e0c3e8ac3330...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r1i1p1f1</td>\n",
" <td>1623072945311055872</td>\n",
" <td>False</td>\n",
" <td>2019-01-19T07:29:41.596Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aims3.llnl.gov:2811//css03_data/CMIP6...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r1i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r1i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>49556f443241595b440af101f6e5f039e067140d2f2974...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r1i1p1f1</td>\n",
" <td>1623072945285890048</td>\n",
" <td>False</td>\n",
" <td>2019-01-19T07:29:41.572Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aims3.llnl.gov:2811//css03_data/CMIP6...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r1i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r1i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>f9d8eb059bbaeb04462e593396ed4583074a910f384d7b...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r1i1p1f1</td>\n",
" <td>1623072945260724224</td>\n",
" <td>False</td>\n",
" <td>2019-01-19T07:29:41.549Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aims3.llnl.gov:2811//css03_data/CMIP6...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r1i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r1i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>c9137976143146c1652f3f044a4acf60dd5d12aee0fd15...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r1i1p1f1</td>\n",
" <td>1623072945236606976</td>\n",
" <td>False</td>\n",
" <td>2019-01-19T07:29:41.526Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aims3.llnl.gov:2811//css03_data/CMIP6...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r2i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r2i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>bac8c88c8f0b46edecefa92b37a8e8a35005a76edcb610...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r2i1p1f1</td>\n",
" <td>1625215405895188480</td>\n",
" <td>False</td>\n",
" <td>2019-02-11T23:03:11.230Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r2i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r2i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>50490a19bef34820a43bf5ab52728ce45b98ed539355b7...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r2i1p1f1</td>\n",
" <td>1625215405870022656</td>\n",
" <td>False</td>\n",
" <td>2019-02-11T23:03:11.206Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r2i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r2i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>0a656d4c86ea95df23b21428cd44594204778615fdd967...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r2i1p1f1</td>\n",
" <td>1625215405852196864</td>\n",
" <td>False</td>\n",
" <td>2019-02-11T23:03:11.188Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r2i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r2i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>2b0e3d7bfcdd12521dd5587d08a4d78c7631f11ca9f579...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r2i1p1f1</td>\n",
" <td>1625215405832273920</td>\n",
" <td>False</td>\n",
" <td>2019-02-11T23:03:11.170Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r3i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r3i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>a1ab16cdff51d001d0bc56bec1b06417aa904e75334517...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r3i1p1f1</td>\n",
" <td>1625215452825255936</td>\n",
" <td>False</td>\n",
" <td>2019-02-11T23:03:55.986Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r3i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r3i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>46ac5458cefbcf9dfdf37f4e55e0cda00aa73a6a13ace6...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r3i1p1f1</td>\n",
" <td>1625215452800090112</td>\n",
" <td>False</td>\n",
" <td>2019-02-11T23:03:55.961Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r3i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r3i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>c5f2a565b4efea9863e0f45f6eb3f57620befc46a25530...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r3i1p1f1</td>\n",
" <td>1625215452773875712</td>\n",
" <td>False</td>\n",
" <td>2019-02-11T23:03:55.937Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r3i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r3i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>f985ca8b6f6cabc9f3ed420f54b5ede16b97812a5df94f...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r3i1p1f1</td>\n",
" <td>1625215452748709888</td>\n",
" <td>False</td>\n",
" <td>2019-02-11T23:03:55.912Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r4i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r4i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>6f1a9b07d3417c366037ce02f042980d699d2b5f0c4809...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r4i1p1f1</td>\n",
" <td>1625215497701163008</td>\n",
" <td>False</td>\n",
" <td>2019-02-11T23:04:38.783Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r4i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r4i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>8fdd08d03993e32b778736845ec48cdd23deb1e1bcc109...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r4i1p1f1</td>\n",
" <td>1625215497672851456</td>\n",
" <td>False</td>\n",
" <td>2019-02-11T23:04:38.756Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r4i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r4i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>1c31d267240325944fb6efc05ec479f73d0478119ba3f7...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r4i1p1f1</td>\n",
" <td>1625215497639297024</td>\n",
" <td>False</td>\n",
" <td>2019-02-11T23:04:38.724Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r4i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r4i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>515505434d7310d58580fac3893cc22b56291c09eb93c0...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r4i1p1f1</td>\n",
" <td>1625215497609936896</td>\n",
" <td>False</td>\n",
" <td>2019-02-11T23:04:38.696Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r5i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r5i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>ba1c40f00c4b3fb703cadbf8c5f8377aa7a85bb4ccd875...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r5i1p1f1</td>\n",
" <td>1625215542206922752</td>\n",
" <td>False</td>\n",
" <td>2019-02-11T23:05:21.227Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r5i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r5i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>5d0ff1d9f839f20f0d905dea9247cc8ad6b53f11ba4bfd...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r5i1p1f1</td>\n",
" <td>1625215542177562624</td>\n",
" <td>False</td>\n",
" <td>2019-02-11T23:05:21.199Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r5i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r5i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>04f57cf15359cd8dced462ed5719b1910095d7d1ee6442...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r5i1p1f1</td>\n",
" <td>1625215542149251072</td>\n",
" <td>False</td>\n",
" <td>2019-02-11T23:05:21.172Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r5i1p1f1...</td>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r5i1p1f1...</td>\n",
" <td>1</td>\n",
" <td>CMIP</td>\n",
" <td>CMIP</td>\n",
" <td>standard</td>\n",
" <td>surface_temperature</td>\n",
" <td>bdd8989353b4fe208c22a2859b07c7b66864c4b8a323d2...</td>\n",
" <td>SHA256</td>\n",
" <td>http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6....</td>\n",
" <td>...</td>\n",
" <td>K</td>\n",
" <td>r5i1p1f1</td>\n",
" <td>1625215542120939520</td>\n",
" <td>False</td>\n",
" <td>2019-02-11T23:05:21.145Z</td>\n",
" <td>1.0</td>\n",
" <td>http://aims3.llnl.gov/thredds/fileServer/css03...</td>\n",
" <td>http://aims3.llnl.gov/thredds/dodsC/css03_data...</td>\n",
" <td>gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM...</td>\n",
" <td>globus:415a6320-e49c-11e5-9798-22000b9da45e/cs...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>20 rows × 56 columns</p>\n",
"</div>"
],
"text/plain": [
" dataset_id \\\n",
"0 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r1i1p1f1... \n",
"4 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r1i1p1f1... \n",
"8 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r1i1p1f1... \n",
"12 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r1i1p1f1... \n",
"16 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r2i1p1f1... \n",
"20 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r2i1p1f1... \n",
"24 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r2i1p1f1... \n",
"28 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r2i1p1f1... \n",
"32 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r3i1p1f1... \n",
"36 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r3i1p1f1... \n",
"0 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r3i1p1f1... \n",
"4 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r3i1p1f1... \n",
"8 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r4i1p1f1... \n",
"12 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r4i1p1f1... \n",
"16 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r4i1p1f1... \n",
"20 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r4i1p1f1... \n",
"24 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r5i1p1f1... \n",
"28 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r5i1p1f1... \n",
"32 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r5i1p1f1... \n",
"36 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r5i1p1f1... \n",
"\n",
" id version activity_drs \\\n",
"0 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r1i1p1f1... 1 CMIP \n",
"4 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r1i1p1f1... 1 CMIP \n",
"8 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r1i1p1f1... 1 CMIP \n",
"12 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r1i1p1f1... 1 CMIP \n",
"16 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r2i1p1f1... 1 CMIP \n",
"20 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r2i1p1f1... 1 CMIP \n",
"24 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r2i1p1f1... 1 CMIP \n",
"28 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r2i1p1f1... 1 CMIP \n",
"32 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r3i1p1f1... 1 CMIP \n",
"36 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r3i1p1f1... 1 CMIP \n",
"0 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r3i1p1f1... 1 CMIP \n",
"4 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r3i1p1f1... 1 CMIP \n",
"8 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r4i1p1f1... 1 CMIP \n",
"12 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r4i1p1f1... 1 CMIP \n",
"16 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r4i1p1f1... 1 CMIP \n",
"20 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r4i1p1f1... 1 CMIP \n",
"24 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r5i1p1f1... 1 CMIP \n",
"28 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r5i1p1f1... 1 CMIP \n",
"32 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r5i1p1f1... 1 CMIP \n",
"36 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r5i1p1f1... 1 CMIP \n",
"\n",
" activity_id branch_method cf_standard_name \\\n",
"0 CMIP standard surface_temperature \n",
"4 CMIP standard surface_temperature \n",
"8 CMIP standard surface_temperature \n",
"12 CMIP standard surface_temperature \n",
"16 CMIP standard surface_temperature \n",
"20 CMIP standard surface_temperature \n",
"24 CMIP standard surface_temperature \n",
"28 CMIP standard surface_temperature \n",
"32 CMIP standard surface_temperature \n",
"36 CMIP standard surface_temperature \n",
"0 CMIP standard surface_temperature \n",
"4 CMIP standard surface_temperature \n",
"8 CMIP standard surface_temperature \n",
"12 CMIP standard surface_temperature \n",
"16 CMIP standard surface_temperature \n",
"20 CMIP standard surface_temperature \n",
"24 CMIP standard surface_temperature \n",
"28 CMIP standard surface_temperature \n",
"32 CMIP standard surface_temperature \n",
"36 CMIP standard surface_temperature \n",
"\n",
" checksum checksum_type \\\n",
"0 3dc95d0a4cdfbf78e958b63cc749979be9e0c3e8ac3330... SHA256 \n",
"4 49556f443241595b440af101f6e5f039e067140d2f2974... SHA256 \n",
"8 f9d8eb059bbaeb04462e593396ed4583074a910f384d7b... SHA256 \n",
"12 c9137976143146c1652f3f044a4acf60dd5d12aee0fd15... SHA256 \n",
"16 bac8c88c8f0b46edecefa92b37a8e8a35005a76edcb610... SHA256 \n",
"20 50490a19bef34820a43bf5ab52728ce45b98ed539355b7... SHA256 \n",
"24 0a656d4c86ea95df23b21428cd44594204778615fdd967... SHA256 \n",
"28 2b0e3d7bfcdd12521dd5587d08a4d78c7631f11ca9f579... SHA256 \n",
"32 a1ab16cdff51d001d0bc56bec1b06417aa904e75334517... SHA256 \n",
"36 46ac5458cefbcf9dfdf37f4e55e0cda00aa73a6a13ace6... SHA256 \n",
"0 c5f2a565b4efea9863e0f45f6eb3f57620befc46a25530... SHA256 \n",
"4 f985ca8b6f6cabc9f3ed420f54b5ede16b97812a5df94f... SHA256 \n",
"8 6f1a9b07d3417c366037ce02f042980d699d2b5f0c4809... SHA256 \n",
"12 8fdd08d03993e32b778736845ec48cdd23deb1e1bcc109... SHA256 \n",
"16 1c31d267240325944fb6efc05ec479f73d0478119ba3f7... SHA256 \n",
"20 515505434d7310d58580fac3893cc22b56291c09eb93c0... SHA256 \n",
"24 ba1c40f00c4b3fb703cadbf8c5f8377aa7a85bb4ccd875... SHA256 \n",
"28 5d0ff1d9f839f20f0d905dea9247cc8ad6b53f11ba4bfd... SHA256 \n",
"32 04f57cf15359cd8dced462ed5719b1910095d7d1ee6442... SHA256 \n",
"36 bdd8989353b4fe208c22a2859b07c7b66864c4b8a323d2... SHA256 \n",
"\n",
" citation_url ... variable_units \\\n",
"0 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"4 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"8 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"12 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"16 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"20 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"24 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"28 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"32 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"36 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"0 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"4 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"8 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"12 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"16 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"20 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"24 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"28 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"32 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"36 http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.... ... K \n",
"\n",
" variant_label _version_ retracted _timestamp \\\n",
"0 r1i1p1f1 1623072945311055872 False 2019-01-19T07:29:41.596Z \n",
"4 r1i1p1f1 1623072945285890048 False 2019-01-19T07:29:41.572Z \n",
"8 r1i1p1f1 1623072945260724224 False 2019-01-19T07:29:41.549Z \n",
"12 r1i1p1f1 1623072945236606976 False 2019-01-19T07:29:41.526Z \n",
"16 r2i1p1f1 1625215405895188480 False 2019-02-11T23:03:11.230Z \n",
"20 r2i1p1f1 1625215405870022656 False 2019-02-11T23:03:11.206Z \n",
"24 r2i1p1f1 1625215405852196864 False 2019-02-11T23:03:11.188Z \n",
"28 r2i1p1f1 1625215405832273920 False 2019-02-11T23:03:11.170Z \n",
"32 r3i1p1f1 1625215452825255936 False 2019-02-11T23:03:55.986Z \n",
"36 r3i1p1f1 1625215452800090112 False 2019-02-11T23:03:55.961Z \n",
"0 r3i1p1f1 1625215452773875712 False 2019-02-11T23:03:55.937Z \n",
"4 r3i1p1f1 1625215452748709888 False 2019-02-11T23:03:55.912Z \n",
"8 r4i1p1f1 1625215497701163008 False 2019-02-11T23:04:38.783Z \n",
"12 r4i1p1f1 1625215497672851456 False 2019-02-11T23:04:38.756Z \n",
"16 r4i1p1f1 1625215497639297024 False 2019-02-11T23:04:38.724Z \n",
"20 r4i1p1f1 1625215497609936896 False 2019-02-11T23:04:38.696Z \n",
"24 r5i1p1f1 1625215542206922752 False 2019-02-11T23:05:21.227Z \n",
"28 r5i1p1f1 1625215542177562624 False 2019-02-11T23:05:21.199Z \n",
"32 r5i1p1f1 1625215542149251072 False 2019-02-11T23:05:21.172Z \n",
"36 r5i1p1f1 1625215542120939520 False 2019-02-11T23:05:21.145Z \n",
"\n",
" score HTTPServer_url \\\n",
"0 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"4 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"8 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"12 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"16 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"20 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"24 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"28 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"32 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"36 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"0 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"4 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"8 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"12 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"16 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"20 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"24 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"28 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"32 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"36 1.0 http://aims3.llnl.gov/thredds/fileServer/css03... \n",
"\n",
" OPENDAP_url \\\n",
"0 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"4 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"8 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"12 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"16 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"20 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"24 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"28 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"32 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"36 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"0 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"4 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"8 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"12 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"16 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"20 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"24 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"28 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"32 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"36 http://aims3.llnl.gov/thredds/dodsC/css03_data... \n",
"\n",
" GridFTP_url \\\n",
"0 gsiftp://aims3.llnl.gov:2811//css03_data/CMIP6... \n",
"4 gsiftp://aims3.llnl.gov:2811//css03_data/CMIP6... \n",
"8 gsiftp://aims3.llnl.gov:2811//css03_data/CMIP6... \n",
"12 gsiftp://aims3.llnl.gov:2811//css03_data/CMIP6... \n",
"16 gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM... \n",
"20 gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM... \n",
"24 gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM... \n",
"28 gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM... \n",
"32 gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM... \n",
"36 gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM... \n",
"0 gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM... \n",
"4 gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM... \n",
"8 gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM... \n",
"12 gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM... \n",
"16 gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM... \n",
"20 gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM... \n",
"24 gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM... \n",
"28 gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM... \n",
"32 gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM... \n",
"36 gsiftp://aimsdtn3.llnl.gov:2811//css03_data/CM... \n",
"\n",
" Globus_url \n",
"0 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"4 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"8 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"12 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"16 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"20 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"24 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"28 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"32 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"36 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"0 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"4 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"8 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"12 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"16 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"20 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"24 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"28 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"32 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"36 globus:415a6320-e49c-11e5-9798-22000b9da45e/cs... \n",
"\n",
"[20 rows x 56 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"files = search.esgf_search(mip_era='CMIP6', activity_drs='CMIP', variable=\"ts\",\n",
" table_id='Amon', institution_id='NASA-GISS', experiment_id='amip')\n",
"files"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r1i1p1f1.Amon.ts.gn.v20181016|aims3.llnl.gov'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"files.iloc[0].dataset_id"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.amip.r1i1p1f1.Amon.ts.gn.v20181016.ts_Amon_GISS-E2-1-G_amip_r1i1p1f1_gn_185001-190012.nc|aims3.llnl.gov'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"files.iloc[0].id"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'gn'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"files.iloc[0].grid_label"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
name: esgf
channels:
- conda-forge
- defaults
dependencies:
- python=3.7
- numpy
- dask
- xarray
- netcdf4
- zarr
- watermark
- requests
- tqdm
"""ESGF API Search Results to Pandas Dataframes
"""
from __future__ import print_function
import warnings
from tqdm.autonotebook import tqdm, trange
from datetime import datetime
import dask
import requests
import pandas as pd
from collections import OrderedDict
# API AT: https://github.com/ESGF/esgf.github.io/wiki/ESGF_Search_REST_API
def _check_doc_for_malformed_id(d):
source_id = d['source_id'][0]
expt_id = d['experiment_id'][0]
if not f"{source_id}_{expt_id}" in d['id']:
raise ValueError(f"Dataset id {d['id']} is malformed")
def _maybe_squeze_values(d):
def _maybe_squeeze(value):
if isinstance(value, str):
return value
try:
if len(value)==1:
return value[0]
except TypeError:
return(value)
return {k: _maybe_squeeze(v) for k, v in d.items()}
def _get_request(server, verbose=False, **payload):
client = requests.session()
url_keys = []
url_keys = ["{}={}".format(k, payload[k]) for k in payload]
url = "{}/?{}".format(server, "&".join(url_keys))
if verbose:
print(url)
r = client.get(url)
r.raise_for_status()
resp = r.json()["response"]
return resp
def _get_page_dataframe(server, expected_size, offset=0,
filter_server_url=None, verbose=False,
**payload):
resp = _get_request(server, offset=offset, verbose=verbose, **payload)
docs = resp["docs"]
assert len(docs) == expected_size
all_files = []
for d in docs:
try:
_check_doc_for_malformed_id(d)
except ValueError:
continue
dataset_id = d['dataset_id']
item = OrderedDict(dataset_id=dataset_id, id=d['id'])
target_urls = d.pop('url')
item.update(_maybe_squeze_values(d))
for f in target_urls:
access_url, mime_type, service_type = f.split("|")
if service_type == 'OPENDAP':
access_url = access_url.replace('.html', '')
if filter_server_url is None or filter_server_url in access_url:
item.update({f'{service_type}_url': access_url})
all_files.append(item)
return pd.DataFrame(all_files)
_get_page_dataframe_d = dask.delayed(_get_page_dataframe)
def _get_csrf_token(server):
client = requests.session()
client.get(server)
if 'csrftoken' in client.cookies:
# Django 1.6 and up
csrftoken = client.cookies['csrftoken']
else:
# older versions
csrftoken = client.cookies['csrf']
return csrftoken
def esgf_search(server="https://esgf-node.llnl.gov/esg-search/search",
project="CMIP6", page_size=10,
# this option should not be necessary with local_node=True
filter_server_url=None, local_node=True,
verbose=False, format="application%2Fsolr%2Bjson",
use_csrf=False, delayed=False, **search):
payload = search
payload["project"] = project
payload["type"]= "File"
if local_node:
payload["distrib"] = "false"
if use_csrf:
payload["csrfmiddlewaretoken"] = _get_csrf_token(server)
payload["format"] = format
init_resp = _get_request(server, offset=0, limit=page_size,
verbose=verbose, **payload)
num_found = int(init_resp["numFound"])
if delayed:
page_function = _get_page_dataframe_d
else:
page_function = _get_page_dataframe
all_frames = []
for offset in range(0, num_found, page_size):
expected_size = (page_size if offset <= (num_found - page_size)
else (num_found - offset))
df_d = page_function(server, expected_size, limit=page_size, offset=offset,
verbose=verbose,
filter_server_url=filter_server_url,
**payload)
all_frames.append(df_d)
if delayed:
all_frames = dask.compute(*all_frames)
# dropping duplicates on checksum removes all identical files
return pd.concat(all_frames).drop_duplicates(subset='checksum')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment