Created
September 24, 2023 13:38
-
-
Save rsignell-usgs/2157778e0d309bdae9b732ff255cac24 to your computer and use it in GitHub Desktop.
coawst_file_size.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "nuclear-politics", | |
"metadata": {}, | |
"source": [ | |
"# Explore the size of the rechunked weekly NetCDF COAWST files" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "north-investment", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import fsspec\n", | |
"import pandas as pd\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "divine-robin", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"prefix= '/proj/usgs/rsignell'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "disabled-facing", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fs = fsspec.filesystem('file')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "handmade-cover", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"flist = sorted(fs.glob(f'{prefix}/coawst*.nc'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "comic-bible", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fsize = np.array([fs.size(f)/1e9 for f in flist]) # Size in GB" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "indian-display", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>file</th>\n", | |
" <th>size</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_105_2011-08-26.nc</td>\n", | |
" <td>26.585513</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2009-08-21_0000.nc</td>\n", | |
" <td>8.751987</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2009-08-28_0001.nc</td>\n", | |
" <td>8.757312</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2009-09-04_0002.nc</td>\n", | |
" <td>12.204467</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2009-09-11_0003.nc</td>\n", | |
" <td>12.162495</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>731</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2023-08-25_0731.nc</td>\n", | |
" <td>26.402631</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>732</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2023-09-01_0732.nc</td>\n", | |
" <td>26.379377</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>733</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2023-09-08_0733.nc</td>\n", | |
" <td>26.866770</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>734</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2023-09-15_0734.nc</td>\n", | |
" <td>28.534198</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>735</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2023-09-22_0735.nc</td>\n", | |
" <td>24.439093</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>736 rows × 2 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" file size\n", | |
"0 /proj/usgs/rsignell/coawst_use_105_2011-08-26.nc 26.585513\n", | |
"1 /proj/usgs/rsignell/coawst_use_2009-08-21_0000.nc 8.751987\n", | |
"2 /proj/usgs/rsignell/coawst_use_2009-08-28_0001.nc 8.757312\n", | |
"3 /proj/usgs/rsignell/coawst_use_2009-09-04_0002.nc 12.204467\n", | |
"4 /proj/usgs/rsignell/coawst_use_2009-09-11_0003.nc 12.162495\n", | |
".. ... ...\n", | |
"731 /proj/usgs/rsignell/coawst_use_2023-08-25_0731.nc 26.402631\n", | |
"732 /proj/usgs/rsignell/coawst_use_2023-09-01_0732.nc 26.379377\n", | |
"733 /proj/usgs/rsignell/coawst_use_2023-09-08_0733.nc 26.866770\n", | |
"734 /proj/usgs/rsignell/coawst_use_2023-09-15_0734.nc 28.534198\n", | |
"735 /proj/usgs/rsignell/coawst_use_2023-09-22_0735.nc 24.439093\n", | |
"\n", | |
"[736 rows x 2 columns]" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = pd.DataFrame({'file': flist, 'size': fsize })\n", | |
"pd.options.display.max_colwidth=100\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "thorough-abraham", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>file</th>\n", | |
" <th>size</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>56</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2010-09-10_0055.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>66</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2010-11-19_0065.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>104</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2011-08-12_0103.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>203</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2013-07-12_0203.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>350</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2016-05-06_0350.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>352</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2016-05-20_0352.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>353</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2016-05-27_0353.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>354</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2016-06-03_0354.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>411</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2017-07-07_0411.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>525</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2019-09-13_0525.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>527</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2019-09-27_0527.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>529</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2019-10-11_0529.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>530</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2019-10-18_0530.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>531</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2019-10-25_0531.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>532</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2019-11-01_0532.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>533</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2019-11-08_0533.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>534</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2019-11-15_0534.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>535</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2019-11-22_0535.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>552</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2020-03-20_0552.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>553</th>\n", | |
" <td>/proj/usgs/rsignell/coawst_use_2020-03-27_0553.nc</td>\n", | |
" <td>0.087043</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" file size\n", | |
"56 /proj/usgs/rsignell/coawst_use_2010-09-10_0055.nc 0.087043\n", | |
"66 /proj/usgs/rsignell/coawst_use_2010-11-19_0065.nc 0.087043\n", | |
"104 /proj/usgs/rsignell/coawst_use_2011-08-12_0103.nc 0.087043\n", | |
"203 /proj/usgs/rsignell/coawst_use_2013-07-12_0203.nc 0.087043\n", | |
"350 /proj/usgs/rsignell/coawst_use_2016-05-06_0350.nc 0.087043\n", | |
"352 /proj/usgs/rsignell/coawst_use_2016-05-20_0352.nc 0.087043\n", | |
"353 /proj/usgs/rsignell/coawst_use_2016-05-27_0353.nc 0.087043\n", | |
"354 /proj/usgs/rsignell/coawst_use_2016-06-03_0354.nc 0.087043\n", | |
"411 /proj/usgs/rsignell/coawst_use_2017-07-07_0411.nc 0.087043\n", | |
"525 /proj/usgs/rsignell/coawst_use_2019-09-13_0525.nc 0.087043\n", | |
"527 /proj/usgs/rsignell/coawst_use_2019-09-27_0527.nc 0.087043\n", | |
"529 /proj/usgs/rsignell/coawst_use_2019-10-11_0529.nc 0.087043\n", | |
"530 /proj/usgs/rsignell/coawst_use_2019-10-18_0530.nc 0.087043\n", | |
"531 /proj/usgs/rsignell/coawst_use_2019-10-25_0531.nc 0.087043\n", | |
"532 /proj/usgs/rsignell/coawst_use_2019-11-01_0532.nc 0.087043\n", | |
"533 /proj/usgs/rsignell/coawst_use_2019-11-08_0533.nc 0.087043\n", | |
"534 /proj/usgs/rsignell/coawst_use_2019-11-15_0534.nc 0.087043\n", | |
"535 /proj/usgs/rsignell/coawst_use_2019-11-22_0535.nc 0.087043\n", | |
"552 /proj/usgs/rsignell/coawst_use_2020-03-20_0552.nc 0.087043\n", | |
"553 /proj/usgs/rsignell/coawst_use_2020-03-27_0553.nc 0.087043" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.loc[df['size'] <= 0.1 ].sort_values('file')" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [conda env:pangeo]", | |
"language": "python", | |
"name": "conda-env-pangeo-py" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.16" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment