Skip to content

Instantly share code, notes, and snippets.

@naomi-henderson
Last active February 3, 2020 20:15
Show Gist options
  • Save naomi-henderson/c8c59cc20e16c392f86bcb1b3c2a312a to your computer and use it in GitHub Desktop.
Save naomi-henderson/c8c59cc20e16c392f86bcb1b3c2a312a to your computer and use it in GitHub Desktop.
Making nice tables with pivot_table and selecting models with variable1 + variable2
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('https://cmip6.storage.googleapis.com/cmip6-zarr-consolidated-stores.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# define a simple search on keywords\n",
"def search_df(df, verbose= False, **search):\n",
" \"search by keywords - if list, then match exactly, otherwise match as substring\"\n",
" keys = ['activity_id','institution_id','source_id','experiment_id','member_id', 'table_id', 'variable_id', 'grid_label']\n",
" d = df\n",
" for skey in search.keys():\n",
" \n",
" if isinstance(search[skey], str): # match a string as a substring\n",
" d = d[d[skey].str.contains(search[skey])]\n",
" else:\n",
" dk = []\n",
" for key in search[skey]: # match a list of strings exactly\n",
" dk += [d[d[skey]==key]]\n",
" d = pd.concat(dk)\n",
" keys.remove(skey)\n",
" if verbose:\n",
" for key in keys:\n",
" print(key,' = ',list(d[key].unique())) \n",
" return d"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dfp = search_df(df, experiment_id=['historical','1pctCO2'], table_id='Omon', variable_id=['tos','o2','chl','dissic'], grid_label=['gn'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dm = dfp[['experiment_id','source_id','variable_id','member_id',]].groupby(['experiment_id','source_id','variable_id']).nunique()[['member_id']]\n",
"\n",
"table = pd.DataFrame.pivot_table(dm, values='member_id', index=['experiment_id','source_id'],\n",
" columns=['variable_id'], aggfunc=np.sum, fill_value=0)\n",
"table"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"table[(table.chl>0)&(table.dissic>0)&(table.tos>0)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "pangeo-Oct2019",
"language": "python",
"name": "pangeo-oct2019"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment