Last active
February 3, 2020 20:15
-
-
Save naomi-henderson/c8c59cc20e16c392f86bcb1b3c2a312a to your computer and use it in GitHub Desktop.
Making nice tables with pivot_table and selecting models with variable1 + variable2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_csv('https://cmip6.storage.googleapis.com/cmip6-zarr-consolidated-stores.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# define a simple search on keywords\n", | |
"def search_df(df, verbose= False, **search):\n", | |
" \"search by keywords - if list, then match exactly, otherwise match as substring\"\n", | |
" keys = ['activity_id','institution_id','source_id','experiment_id','member_id', 'table_id', 'variable_id', 'grid_label']\n", | |
" d = df\n", | |
" for skey in search.keys():\n", | |
" \n", | |
" if isinstance(search[skey], str): # match a string as a substring\n", | |
" d = d[d[skey].str.contains(search[skey])]\n", | |
" else:\n", | |
" dk = []\n", | |
" for key in search[skey]: # match a list of strings exactly\n", | |
" dk += [d[d[skey]==key]]\n", | |
" d = pd.concat(dk)\n", | |
" keys.remove(skey)\n", | |
" if verbose:\n", | |
" for key in keys:\n", | |
" print(key,' = ',list(d[key].unique())) \n", | |
" return d" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dfp = search_df(df, experiment_id=['historical','1pctCO2'], table_id='Omon', variable_id=['tos','o2','chl','dissic'], grid_label=['gn'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dm = dfp[['experiment_id','source_id','variable_id','member_id',]].groupby(['experiment_id','source_id','variable_id']).nunique()[['member_id']]\n", | |
"\n", | |
"table = pd.DataFrame.pivot_table(dm, values='member_id', index=['experiment_id','source_id'],\n", | |
" columns=['variable_id'], aggfunc=np.sum, fill_value=0)\n", | |
"table" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"table[(table.chl>0)&(table.dissic>0)&(table.tos>0)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "pangeo-Oct2019", | |
"language": "python", | |
"name": "pangeo-oct2019" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment