Skip to content

Instantly share code, notes, and snippets.

@rsignell-usgs
Created September 24, 2023 13:38
Show Gist options
  • Save rsignell-usgs/2157778e0d309bdae9b732ff255cac24 to your computer and use it in GitHub Desktop.
Save rsignell-usgs/2157778e0d309bdae9b732ff255cac24 to your computer and use it in GitHub Desktop.
coawst_file_size.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "nuclear-politics",
"metadata": {},
"source": [
"# Explore the size of the rechunked weekly NetCDF COAWST files"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "north-investment",
"metadata": {},
"outputs": [],
"source": [
"import fsspec\n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "divine-robin",
"metadata": {},
"outputs": [],
"source": [
"prefix= '/proj/usgs/rsignell'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "disabled-facing",
"metadata": {},
"outputs": [],
"source": [
"fs = fsspec.filesystem('file')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "handmade-cover",
"metadata": {},
"outputs": [],
"source": [
"flist = sorted(fs.glob(f'{prefix}/coawst*.nc'))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "comic-bible",
"metadata": {},
"outputs": [],
"source": [
"fsize = np.array([fs.size(f)/1e9 for f in flist]) # Size in GB"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "indian-display",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>file</th>\n",
" <th>size</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_105_2011-08-26.nc</td>\n",
" <td>26.585513</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2009-08-21_0000.nc</td>\n",
" <td>8.751987</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2009-08-28_0001.nc</td>\n",
" <td>8.757312</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2009-09-04_0002.nc</td>\n",
" <td>12.204467</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2009-09-11_0003.nc</td>\n",
" <td>12.162495</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>731</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2023-08-25_0731.nc</td>\n",
" <td>26.402631</td>\n",
" </tr>\n",
" <tr>\n",
" <th>732</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2023-09-01_0732.nc</td>\n",
" <td>26.379377</td>\n",
" </tr>\n",
" <tr>\n",
" <th>733</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2023-09-08_0733.nc</td>\n",
" <td>26.866770</td>\n",
" </tr>\n",
" <tr>\n",
" <th>734</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2023-09-15_0734.nc</td>\n",
" <td>28.534198</td>\n",
" </tr>\n",
" <tr>\n",
" <th>735</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2023-09-22_0735.nc</td>\n",
" <td>24.439093</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>736 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" file size\n",
"0 /proj/usgs/rsignell/coawst_use_105_2011-08-26.nc 26.585513\n",
"1 /proj/usgs/rsignell/coawst_use_2009-08-21_0000.nc 8.751987\n",
"2 /proj/usgs/rsignell/coawst_use_2009-08-28_0001.nc 8.757312\n",
"3 /proj/usgs/rsignell/coawst_use_2009-09-04_0002.nc 12.204467\n",
"4 /proj/usgs/rsignell/coawst_use_2009-09-11_0003.nc 12.162495\n",
".. ... ...\n",
"731 /proj/usgs/rsignell/coawst_use_2023-08-25_0731.nc 26.402631\n",
"732 /proj/usgs/rsignell/coawst_use_2023-09-01_0732.nc 26.379377\n",
"733 /proj/usgs/rsignell/coawst_use_2023-09-08_0733.nc 26.866770\n",
"734 /proj/usgs/rsignell/coawst_use_2023-09-15_0734.nc 28.534198\n",
"735 /proj/usgs/rsignell/coawst_use_2023-09-22_0735.nc 24.439093\n",
"\n",
"[736 rows x 2 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame({'file': flist, 'size': fsize })\n",
"pd.options.display.max_colwidth=100\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "thorough-abraham",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>file</th>\n",
" <th>size</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2010-09-10_0055.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>66</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2010-11-19_0065.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>104</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2011-08-12_0103.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>203</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2013-07-12_0203.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>350</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2016-05-06_0350.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>352</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2016-05-20_0352.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>353</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2016-05-27_0353.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>354</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2016-06-03_0354.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>411</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2017-07-07_0411.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>525</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2019-09-13_0525.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>527</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2019-09-27_0527.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>529</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2019-10-11_0529.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>530</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2019-10-18_0530.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>531</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2019-10-25_0531.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>532</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2019-11-01_0532.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>533</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2019-11-08_0533.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>534</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2019-11-15_0534.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>535</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2019-11-22_0535.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>552</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2020-03-20_0552.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>553</th>\n",
" <td>/proj/usgs/rsignell/coawst_use_2020-03-27_0553.nc</td>\n",
" <td>0.087043</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" file size\n",
"56 /proj/usgs/rsignell/coawst_use_2010-09-10_0055.nc 0.087043\n",
"66 /proj/usgs/rsignell/coawst_use_2010-11-19_0065.nc 0.087043\n",
"104 /proj/usgs/rsignell/coawst_use_2011-08-12_0103.nc 0.087043\n",
"203 /proj/usgs/rsignell/coawst_use_2013-07-12_0203.nc 0.087043\n",
"350 /proj/usgs/rsignell/coawst_use_2016-05-06_0350.nc 0.087043\n",
"352 /proj/usgs/rsignell/coawst_use_2016-05-20_0352.nc 0.087043\n",
"353 /proj/usgs/rsignell/coawst_use_2016-05-27_0353.nc 0.087043\n",
"354 /proj/usgs/rsignell/coawst_use_2016-06-03_0354.nc 0.087043\n",
"411 /proj/usgs/rsignell/coawst_use_2017-07-07_0411.nc 0.087043\n",
"525 /proj/usgs/rsignell/coawst_use_2019-09-13_0525.nc 0.087043\n",
"527 /proj/usgs/rsignell/coawst_use_2019-09-27_0527.nc 0.087043\n",
"529 /proj/usgs/rsignell/coawst_use_2019-10-11_0529.nc 0.087043\n",
"530 /proj/usgs/rsignell/coawst_use_2019-10-18_0530.nc 0.087043\n",
"531 /proj/usgs/rsignell/coawst_use_2019-10-25_0531.nc 0.087043\n",
"532 /proj/usgs/rsignell/coawst_use_2019-11-01_0532.nc 0.087043\n",
"533 /proj/usgs/rsignell/coawst_use_2019-11-08_0533.nc 0.087043\n",
"534 /proj/usgs/rsignell/coawst_use_2019-11-15_0534.nc 0.087043\n",
"535 /proj/usgs/rsignell/coawst_use_2019-11-22_0535.nc 0.087043\n",
"552 /proj/usgs/rsignell/coawst_use_2020-03-20_0552.nc 0.087043\n",
"553 /proj/usgs/rsignell/coawst_use_2020-03-27_0553.nc 0.087043"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[df['size'] <= 0.1 ].sort_values('file')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:pangeo]",
"language": "python",
"name": "conda-env-pangeo-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment