Last active
November 21, 2020 23:19
-
-
Save naomi-henderson/5ee0bbb1a50e026c6b460a8d8d8a7416 to your computer and use it in GitHub Desktop.
Direct Access to CMIP6 in GCS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Direct Access to the CMIP6 data in our Google Public Collection" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import xarray as xr\n", | |
"import gcsfs" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Read Catalog" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# This is the master CMIP6 Google Cloud catalog \n", | |
"# this is in CSV format, so load into notebook as a pandas dataframe\n", | |
"\n", | |
"df_cloud = pd.read_csv('https://cmip6.storage.googleapis.com/pangeo-cmip6.csv', dtype='unicode')\n", | |
"\n", | |
"print('number of datasets:',len(df_cloud))\n", | |
"df_cloud.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Subselect Catalog (dataframe)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_sub = df_cloud[(df_cloud.experiment_id=='historical')&(df_cloud.variable_id=='tas')]\n", | |
"df_sub = df_sub[df_sub.table_id.eq('Amon')]\n", | |
"print('number of datasets:',len(df_sub))\n", | |
"df_sub.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Look at one row" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Lets look at one entry (row) \n", | |
"# see https://github.com/WCRP-CMIP/CMIP6_CVs for the column names (Controlled Vocabulary of CMIP6)\n", | |
"first_row = df_sub.iloc[0]\n", | |
"first_row" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Get URL to the `xarray` dataset in `zarr` format" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# The most important column is the actual location of the dataset in Google Cloud Storage\n", | |
"\n", | |
"gsurl = first_row.zstore\n", | |
"variable = first_row.variable_id\n", | |
"print(gsurl, variable)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Connect to Google Cloud Storage" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# So lets look at one of these datasets. First we connect to Google Cloud Storage.\n", | |
"\n", | |
"fs = gcsfs.GCSFileSystem(token='anon', access='read_only')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Read the Dataset (this is 'lazy' - only reads what is needed for each request)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# and simply read the data: \n", | |
"\n", | |
"ds = xr.open_zarr(fs.get_mapper(gsurl),consolidated=True)\n", | |
"ds" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### More details" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Look at the 'Data variables:' section for the name and dimensions of the dataset\n", | |
"\n", | |
"ds[variable].attrs\n", | |
"# Can also try: ds.attrs, ds.time.attrs, etc" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Make a quick plot of one time slice" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# What does the first time slice look like?\n", | |
"\n", | |
"ds[variable][0].plot()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "pangeo-fall2020", | |
"language": "python", | |
"name": "pangeo-fall2020" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment