naomi-henderson · November 21, 2020 23:19
diff --git a/Basics0.ipynb b/Basics0.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Direct Access to the CMIP6 data in our Google Public Collection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import xarray as xr\n",
    "import gcsfs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Read Catalog"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This is the master CMIP6 Google Cloud catalog \n",
    "#    this is in CSV format, so load into notebook as a pandas dataframe\n",
    "\n",
    "df_cloud = pd.read_csv('https://cmip6.storage.googleapis.com/pangeo-cmip6.csv', dtype='unicode')\n",
    "\n",
    "print('number of datasets:',len(df_cloud))\n",
    "df_cloud.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Subselect Catalog (dataframe)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_sub = df_cloud[(df_cloud.experiment_id=='historical')&(df_cloud.variable_id=='tas')]\n",
    "df_sub = df_sub[df_sub.table_id.eq('Amon')]\n",
    "print('number of datasets:',len(df_sub))\n",
    "df_sub.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Look at one row"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Lets look at one entry (row)   \n",
    "#      see https://github.com/WCRP-CMIP/CMIP6_CVs for the column names (Controlled Vocabulary of CMIP6)\n",
    "first_row = df_sub.iloc[0]\n",
    "first_row"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Get URL to the `xarray` dataset in `zarr` format"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The most important column is the actual location of the dataset in Google Cloud Storage\n",
    "\n",
    "gsurl = first_row.zstore\n",
    "variable = first_row.variable_id\n",
    "print(gsurl, variable)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Connect to Google Cloud Storage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# So lets look at one of these datasets. First we connect to Google Cloud Storage.\n",
    "\n",
    "fs = gcsfs.GCSFileSystem(token='anon', access='read_only')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Read the Dataset  (this is 'lazy' - only reads what is needed for each request)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# and simply read the data: \n",
    "\n",
    "ds = xr.open_zarr(fs.get_mapper(gsurl),consolidated=True)\n",
    "ds"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### More details"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Look at the 'Data variables:' section for the name and dimensions of the dataset\n",
    "\n",
    "ds[variable].attrs\n",
    "# Can also try: ds.attrs, ds.time.attrs, etc"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Make a quick plot of one time slice"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# What does the first time slice look like?\n",
    "\n",
    "ds[variable][0].plot()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "pangeo-fall2020",
   "language": "python",
   "name": "pangeo-fall2020"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Direct Access to the CMIP6 data in our Google Public Collection"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import xarray as xr\n",
	"import gcsfs"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Read Catalog"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# This is the master CMIP6 Google Cloud catalog \n",
	"# this is in CSV format, so load into notebook as a pandas dataframe\n",
	"\n",
	"df_cloud = pd.read_csv('https://cmip6.storage.googleapis.com/pangeo-cmip6.csv', dtype='unicode')\n",
	"\n",
	"print('number of datasets:',len(df_cloud))\n",
	"df_cloud.head()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Subselect Catalog (dataframe)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"df_sub = df_cloud[(df_cloud.experiment_id=='historical')&(df_cloud.variable_id=='tas')]\n",
	"df_sub = df_sub[df_sub.table_id.eq('Amon')]\n",
	"print('number of datasets:',len(df_sub))\n",
	"df_sub.head()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Look at one row"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Lets look at one entry (row) \n",
	"# see https://github.com/WCRP-CMIP/CMIP6_CVs for the column names (Controlled Vocabulary of CMIP6)\n",
	"first_row = df_sub.iloc[0]\n",
	"first_row"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Get URL to the `xarray` dataset in `zarr` format"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# The most important column is the actual location of the dataset in Google Cloud Storage\n",
	"\n",
	"gsurl = first_row.zstore\n",
	"variable = first_row.variable_id\n",
	"print(gsurl, variable)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Connect to Google Cloud Storage"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# So lets look at one of these datasets. First we connect to Google Cloud Storage.\n",
	"\n",
	"fs = gcsfs.GCSFileSystem(token='anon', access='read_only')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Read the Dataset (this is 'lazy' - only reads what is needed for each request)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# and simply read the data: \n",
	"\n",
	"ds = xr.open_zarr(fs.get_mapper(gsurl),consolidated=True)\n",
	"ds"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### More details"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Look at the 'Data variables:' section for the name and dimensions of the dataset\n",
	"\n",
	"ds[variable].attrs\n",
	"# Can also try: ds.attrs, ds.time.attrs, etc"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Make a quick plot of one time slice"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# What does the first time slice look like?\n",
	"\n",
	"ds[variable][0].plot()"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "pangeo-fall2020",
	"language": "python",
	"name": "pangeo-fall2020"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.8"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}