Skip to content

Instantly share code, notes, and snippets.

@andersy005
Created March 24, 2021 22:48
Show Gist options
  • Select an option

  • Save andersy005/6b9daf38840a3b9b639208a11aaf9d24 to your computer and use it in GitHub Desktop.

Select an option

Save andersy005/6b9daf38840a3b9b639208a11aaf9d24 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 35,
"id": "voluntary-weekend",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import pathlib\n",
"import xarray as xr\n",
"import cf_xarray "
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "conditional-revolution",
"metadata": {},
"outputs": [],
"source": [
"root = pathlib.Path(\"/glade/campaign/cgd/cesm/CESM2-LE/archive/b.e21.BHISTcmip6.AAForcing.f09_g17.LE2-1281.001/\")\n",
"#root = pathlib.Path(\"/glade/campaign/cgd/cesm/CESM2-LE/archive/\")"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "alternative-thong",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 34.4 ms, sys: 8.11 ms, total: 42.5 ms\n",
"Wall time: 41.7 ms\n"
]
},
{
"data": {
"text/plain": [
"6171"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"files = list(root.glob(\"**/*.nc\"))\n",
"#files = list(root.glob(\"b.e*/*/*/tseries/*/*.nc\"))\n",
"#all_files = list()\n",
"len(files)"
]
},
{
"cell_type": "code",
"execution_count": 76,
"id": "sublime-bracket",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"PosixPath('/glade/campaign/cgd/cesm/CESM2-LE/archive/b.e21.BHISTcmip6.AAForcing.f09_g17.LE2-1281.001/atm/proc/tseries/hour_3/b.e21.BHISTcmip6.AAForcing.f09_g17.LE2-1281.001.cam.h3.CLDLOW.2010010100-2014123100.nc')"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"files[0]"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "junior-combining",
"metadata": {},
"outputs": [],
"source": [
"def parse(file):\n",
" ds = xr.open_dataset(file, chunks={})\n",
" start_time, end_time = str(ds.time[0].data), str(ds.time[-1].data)\n",
" variable = file.parts[-1].split('.')[-3]\n",
" long_name = ds[variable].attrs.get('long_name', None)\n",
" units = ds[variable].attrs.get('units', None)\n",
" vertical_levels = 1\n",
" regions = {\n",
" 'atm': 'global',\n",
" 'ocn': 'global_ocean',\n",
" 'lnd': 'global_land',\n",
" 'ice_nh': 'artic_ocean',\n",
" 'ice_sh': 'antarctica',\n",
" }\n",
" \n",
" try:\n",
" vertical_levels = ds[ds.cf['vertical'].name].size\n",
" except KeyError:\n",
" pass\n",
" component = None\n",
" \n",
" spatial_domain = regions.get(component, None)\n",
" spatial_resolution = None\n",
" \n",
" return {'path': str(file), 'long_name': long_name, 'variable': variable, 'start_time': start_time, 'end_time': end_time, \n",
" 'vertical_levels': vertical_levels, 'units': units, 'stream': None, 'frequency': None, 'component': component, 'case': None, 'member_id': None, \n",
" 'experiment': None, 'spatial_domain': spatial_domain, 'spatial_resolution': spatial_resolution}"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "negative-panama",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.4 s, sys: 6.01 ms, total: 1.4 s\n",
"Wall time: 1.89 s\n"
]
}
],
"source": [
"%%time\n",
"items = list(map(parse, files[:10]))"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "excessive-rapid",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>path</th>\n",
" <th>long_name</th>\n",
" <th>variable</th>\n",
" <th>start_time</th>\n",
" <th>end_time</th>\n",
" <th>vertical_levels</th>\n",
" <th>units</th>\n",
" <th>stream</th>\n",
" <th>frequency</th>\n",
" <th>component</th>\n",
" <th>case</th>\n",
" <th>member_id</th>\n",
" <th>experiment</th>\n",
" <th>spatial_domain</th>\n",
" <th>spatial_resolution</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>/glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2...</td>\n",
" <td>Vertically-integrated low cloud</td>\n",
" <td>CLDLOW</td>\n",
" <td>2010-01-01 00:00:00</td>\n",
" <td>2015-01-01 00:00:00</td>\n",
" <td>1</td>\n",
" <td>fraction</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>/glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2...</td>\n",
" <td>Lowest model level zonal wind</td>\n",
" <td>UBOT</td>\n",
" <td>2010-01-01 00:00:00</td>\n",
" <td>2015-01-01 00:00:00</td>\n",
" <td>1</td>\n",
" <td>m/s</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>/glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2...</td>\n",
" <td>Large-scale (stable) precipitation rate (liq +...</td>\n",
" <td>PRECL</td>\n",
" <td>2000-01-01 00:00:00</td>\n",
" <td>2009-12-31 21:00:00</td>\n",
" <td>1</td>\n",
" <td>m/s</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>/glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2...</td>\n",
" <td>Convective precipitation rate (liq + ice)</td>\n",
" <td>PRECC</td>\n",
" <td>2000-01-01 00:00:00</td>\n",
" <td>2009-12-31 21:00:00</td>\n",
" <td>1</td>\n",
" <td>m/s</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>/glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2...</td>\n",
" <td>Vertically-integrated low cloud</td>\n",
" <td>CLDLOW</td>\n",
" <td>2000-01-01 00:00:00</td>\n",
" <td>2009-12-31 21:00:00</td>\n",
" <td>1</td>\n",
" <td>fraction</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>/glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2...</td>\n",
" <td>Convective precipitation rate (liq + ice)</td>\n",
" <td>PRECC</td>\n",
" <td>1990-01-01 00:00:00</td>\n",
" <td>1999-12-31 21:00:00</td>\n",
" <td>1</td>\n",
" <td>m/s</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>/glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2...</td>\n",
" <td>Lowest model level meridional wind</td>\n",
" <td>VBOT</td>\n",
" <td>2010-01-01 00:00:00</td>\n",
" <td>2015-01-01 00:00:00</td>\n",
" <td>1</td>\n",
" <td>m/s</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>/glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2...</td>\n",
" <td>Lowest model level meridional wind</td>\n",
" <td>VBOT</td>\n",
" <td>1990-01-01 00:00:00</td>\n",
" <td>1999-12-31 21:00:00</td>\n",
" <td>1</td>\n",
" <td>m/s</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>/glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2...</td>\n",
" <td>Convective precipitation rate (liq + ice)</td>\n",
" <td>PRECC</td>\n",
" <td>2010-01-01 00:00:00</td>\n",
" <td>2015-01-01 00:00:00</td>\n",
" <td>1</td>\n",
" <td>m/s</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>/glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2...</td>\n",
" <td>Lowest model level meridional wind</td>\n",
" <td>VBOT</td>\n",
" <td>2000-01-01 00:00:00</td>\n",
" <td>2009-12-31 21:00:00</td>\n",
" <td>1</td>\n",
" <td>m/s</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" path \\\n",
"0 /glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2... \n",
"1 /glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2... \n",
"2 /glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2... \n",
"3 /glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2... \n",
"4 /glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2... \n",
"5 /glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2... \n",
"6 /glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2... \n",
"7 /glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2... \n",
"8 /glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2... \n",
"9 /glade/campaign/cgd/cesm/CESM2-LE/archive/b.e2... \n",
"\n",
" long_name variable \\\n",
"0 Vertically-integrated low cloud CLDLOW \n",
"1 Lowest model level zonal wind UBOT \n",
"2 Large-scale (stable) precipitation rate (liq +... PRECL \n",
"3 Convective precipitation rate (liq + ice) PRECC \n",
"4 Vertically-integrated low cloud CLDLOW \n",
"5 Convective precipitation rate (liq + ice) PRECC \n",
"6 Lowest model level meridional wind VBOT \n",
"7 Lowest model level meridional wind VBOT \n",
"8 Convective precipitation rate (liq + ice) PRECC \n",
"9 Lowest model level meridional wind VBOT \n",
"\n",
" start_time end_time vertical_levels units stream \\\n",
"0 2010-01-01 00:00:00 2015-01-01 00:00:00 1 fraction None \n",
"1 2010-01-01 00:00:00 2015-01-01 00:00:00 1 m/s None \n",
"2 2000-01-01 00:00:00 2009-12-31 21:00:00 1 m/s None \n",
"3 2000-01-01 00:00:00 2009-12-31 21:00:00 1 m/s None \n",
"4 2000-01-01 00:00:00 2009-12-31 21:00:00 1 fraction None \n",
"5 1990-01-01 00:00:00 1999-12-31 21:00:00 1 m/s None \n",
"6 2010-01-01 00:00:00 2015-01-01 00:00:00 1 m/s None \n",
"7 1990-01-01 00:00:00 1999-12-31 21:00:00 1 m/s None \n",
"8 2010-01-01 00:00:00 2015-01-01 00:00:00 1 m/s None \n",
"9 2000-01-01 00:00:00 2009-12-31 21:00:00 1 m/s None \n",
"\n",
" frequency component case member_id experiment spatial_domain \\\n",
"0 None None None None None None \n",
"1 None None None None None None \n",
"2 None None None None None None \n",
"3 None None None None None None \n",
"4 None None None None None None \n",
"5 None None None None None None \n",
"6 None None None None None None \n",
"7 None None None None None None \n",
"8 None None None None None None \n",
"9 None None None None None None \n",
"\n",
" spatial_resolution \n",
"0 None \n",
"1 None \n",
"2 None \n",
"3 None \n",
"4 None \n",
"5 None \n",
"6 None \n",
"7 None \n",
"8 None \n",
"9 None "
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(items)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "southwest-thinking",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:playground]",
"language": "python",
"name": "conda-env-playground-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment