Skip to content

Instantly share code, notes, and snippets.

@jbusecke
Last active May 3, 2022 21:17
Show Gist options
  • Save jbusecke/7cc72f4667bb5b23849a6db7d02d6d91 to your computer and use it in GitHub Desktop.
Save jbusecke/7cc72f4667bb5b23849a6db7d02d6d91 to your computer and use it in GitHub Desktop.
Pangeo Forge Recipes: Dynamically determine `subset_inputs` and `target_chunks` from cached files
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "b0b6eabe-b411-40b5-8687-642be7a7406c",
"metadata": {},
"source": [
"# Local testing for cmip6-feedstock\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "a22aff8d-f27c-402c-bce7-3d23520cf947",
"metadata": {},
"outputs": [],
"source": [
"# !mamba install pangeo-forge-recipes -y"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a39acc93-ec5b-4c3a-acec-ea4ece2974c4",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&institution_id=CCCma&source_id=CanESM5&experiment_id=historical&member_id=r1i1p1f1&table_id=Omon&variable_id=zos&grid_label=gn&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&limit=500&offset=0\n",
"https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&institution_id=CCCma&source_id=CanESM5&experiment_id=historical&member_id=r1i1p1f1&table_id=Omon&variable_id=so&grid_label=gn&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&limit=500&offset=0\n",
"https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&institution_id=MOHC&source_id=UKESM1-0-LL&experiment_id=historical&member_id=r1i1p1f2&table_id=SImon&variable_id=siitdconc&grid_label=gn&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&limit=500&offset=0\n",
"https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&institution_id=NOAA-GFDL&source_id=GFDL-CM4&experiment_id=historical&member_id=r1i1p1f1&table_id=SImon&variable_id=sithick&grid_label=gn&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&limit=500&offset=0\n",
"https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&institution_id=NOAA-GFDL&source_id=GFDL-CM4&experiment_id=historical&member_id=r1i1p1f1&table_id=SImon&variable_id=siconc&grid_label=gn&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&limit=500&offset=0\n"
]
}
],
"source": [
"from pangeo_forge_recipes.recipes import setup_logging\n",
"from feedstock.recipe import recipe_4 as recipe # GFDL"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "42a04806-5264-4f87-b348-915bb34fc911",
"metadata": {},
"outputs": [],
"source": [
"setup_logging('DEBUG')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "02441b87-1356-496a-bc0e-1183ae5637c4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pangeo_forge_recipes.recipes.xarray_zarr - INFO - Caching input 'Index({DimIndex(name='time', index=0, sequence_len=2, operation=<CombineOp.CONCAT: 2>)})'\n",
"pangeo_forge_recipes.storage - INFO - Caching file 'http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/SImon/siconc/gn/v20180701/siconc_SImon_GFDL-CM4_historical_r1i1p1f1_gn_185001-194912.nc'\n",
"pangeo_forge_recipes.storage - INFO - Copying remote file 'http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/SImon/siconc/gn/v20180701/siconc_SImon_GFDL-CM4_historical_r1i1p1f1_gn_185001-194912.nc' to cache\n",
"pangeo_forge_recipes.storage - DEBUG - entering fs.open context manager for /tmp/tmp82vlep42/DLYZdCPx/3c23bdda4d0f36b2f70b33097928e091-http_aims3.llnl.gov_thredds_fileserver_css03_data_cmip6_cmip_noaa-gfdl_gfdl-cm4_historical_r1i1p1f1_simon_siconc_gn_v20180701_siconc_simon_gfdl-cm4_historical_r1i1p1f1_gn_185001-194912.nc\n",
"pangeo_forge_recipes.storage - DEBUG - FSSpecTarget.open yielding <fsspec.implementations.local.LocalFileOpener object at 0x7f6fc3f192b0>\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 10000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.02 min: 9.27 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 60000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.10 min: 10.19 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 100000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.19 min: 8.90 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 130000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.26 min: 8.46 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 160000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.35 min: 7.66 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 180000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.43 min: 6.90 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 200000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.51 min: 6.48 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 230000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.60 min: 6.41 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 270000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.68 min: 6.65 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 310000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.77 min: 6.73 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 350000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.85 min: 6.84 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 370000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.92 min: 6.70 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 400000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.01 min: 6.61 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 430000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.09 min: 6.56 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 460000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.17 min: 6.56 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 500000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.26 min: 6.63 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 540000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.36 min: 6.61 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 560000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.42 min: 6.55 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 590000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.52 min: 6.47 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 610000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.59 min: 6.39 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 640000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.70 min: 6.28 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 660000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.77 min: 6.21 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 680000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.84 min: 6.17 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 720000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.92 min: 6.26 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 770000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.02 min: 6.36 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 800000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.11 min: 6.33 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 820000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.17 min: 6.30 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 850000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.28 min: 6.22 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 860000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.34 min: 6.13 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 880000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.45 min: 5.98 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 890000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.51 min: 5.92 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 910000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.61 min: 5.82 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 930000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.67 min: 5.80 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 960000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.79 min: 5.74 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 970000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.84 min: 5.68 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 990000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.96 min: 5.58 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1000000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.01 min: 5.53 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1020000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.12 min: 5.46 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1040000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.20 min: 5.42 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1050000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.26 min: 5.37 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1070000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.39 min: 5.26 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1080000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.44 min: 5.24 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1110000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.52 min: 5.25 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1140000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.59 min: 5.29 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1180000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.69 min: 5.34 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1220000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.77 min: 5.40 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - FSSpecTarget.open yielded\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems done\n",
"pangeo_forge_recipes.recipes.xarray_zarr - INFO - Opening input with Xarray Index({DimIndex(name='time', index=0, sequence_len=2, operation=<CombineOp.CONCAT: 2>)}): 'http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/SImon/siconc/gn/v20180701/siconc_SImon_GFDL-CM4_historical_r1i1p1f1_gn_185001-194912.nc'\n",
"pangeo_forge_recipes.storage - INFO - Opening 'http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/SImon/siconc/gn/v20180701/siconc_SImon_GFDL-CM4_historical_r1i1p1f1_gn_185001-194912.nc' from cache\n",
"pangeo_forge_recipes.storage - DEBUG - file_opener entering first context for <contextlib._GeneratorContextManager object at 0x7f6fc47929d0>\n",
"pangeo_forge_recipes.storage - DEBUG - entering fs.open context manager for /tmp/tmp82vlep42/DLYZdCPx/3c23bdda4d0f36b2f70b33097928e091-http_aims3.llnl.gov_thredds_fileserver_css03_data_cmip6_cmip_noaa-gfdl_gfdl-cm4_historical_r1i1p1f1_simon_siconc_gn_v20180701_siconc_simon_gfdl-cm4_historical_r1i1p1f1_gn_185001-194912.nc\n",
"pangeo_forge_recipes.storage - DEBUG - FSSpecTarget.open yielding <fsspec.implementations.local.LocalFileOpener object at 0x7f6fc3f19190>\n",
"pangeo_forge_recipes.storage - DEBUG - file_opener entering second context for <fsspec.implementations.local.LocalFileOpener object at 0x7f6fc3f19190>\n",
"pangeo_forge_recipes.recipes.xarray_zarr - DEBUG - about to enter xr.open_dataset context on <fsspec.implementations.local.LocalFileOpener object at 0x7f6fc3f19190>\n",
"pangeo_forge_recipes.recipes.xarray_zarr - DEBUG - successfully opened dataset\n",
"pangeo_forge_recipes.recipes.xarray_zarr - DEBUG - <xarray.Dataset>\n",
"Dimensions: (bnds: 2, time: 1200, y: 1080, x: 1440, xTe: 1441, yTe: 1081,\n",
" vertex: 4)\n",
"Coordinates:\n",
" * bnds (bnds) float64 1.0 2.0\n",
" * time (time) object 1850-01-16 12:00:00 ... 1949-12-16 12:00:00\n",
" * x (x) float64 -299.7 -299.5 -299.2 -299.0 ... 59.53 59.78 60.03\n",
" * xTe (xTe) float64 -299.8 -299.6 -299.3 -299.1 ... 59.66 59.91 60.16\n",
" * y (y) float64 -80.39 -80.31 -80.23 -80.15 ... 89.73 89.84 89.95\n",
" * yTe (yTe) float64 -80.43 -80.35 -80.27 -80.19 ... 89.78 89.89 90.0\n",
" lon (y, x) float32 ...\n",
" lat (y, x) float32 ...\n",
"Dimensions without coordinates: vertex\n",
"Data variables:\n",
" siconc (time, y, x) float32 ...\n",
" time_bnds (time, bnds) object ...\n",
" lat_bnds (y, x, vertex) float32 ...\n",
" lon_bnds (y, x, vertex) float32 ...\n",
"Attributes: (12/46)\n",
" history: File was processed by fremetar (GFDL analog of CM...\n",
" table_id: SImon\n",
" activity_id: CMIP\n",
" branch_method: standard\n",
" branch_time_in_child: 0.0\n",
" comment: <null ref>\n",
" ... ...\n",
" variable_id: siconc\n",
" variant_info: N/A\n",
" references: see further_info_url attribute\n",
" variant_label: r1i1p1f1\n",
" branch_time_in_parent: 36500.0\n",
" parent_time_units: days since 0001-1-1\n",
"pangeo_forge_recipes.recipes.xarray_zarr - INFO - Caching metadata for input 'Index({DimIndex(name='time', index=0, sequence_len=2, operation=<CombineOp.CONCAT: 2>)})'\n",
"pangeo_forge_recipes.storage - DEBUG - file_opener yielded\n",
"pangeo_forge_recipes.storage - DEBUG - FSSpecTarget.open yielded\n",
"pangeo_forge_recipes.storage - DEBUG - opener done\n",
"pangeo_forge_recipes.recipes.xarray_zarr - INFO - Caching input 'Index({DimIndex(name='time', index=1, sequence_len=2, operation=<CombineOp.CONCAT: 2>)})'\n",
"pangeo_forge_recipes.storage - INFO - Caching file 'http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/SImon/siconc/gn/v20180701/siconc_SImon_GFDL-CM4_historical_r1i1p1f1_gn_195001-201412.nc'\n",
"pangeo_forge_recipes.storage - INFO - Copying remote file 'http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/SImon/siconc/gn/v20180701/siconc_SImon_GFDL-CM4_historical_r1i1p1f1_gn_195001-201412.nc' to cache\n",
"pangeo_forge_recipes.storage - DEBUG - entering fs.open context manager for /tmp/tmp82vlep42/DLYZdCPx/590bf49ef71cc09fcfbaf03224723d6e-http_aims3.llnl.gov_thredds_fileserver_css03_data_cmip6_cmip_noaa-gfdl_gfdl-cm4_historical_r1i1p1f1_simon_siconc_gn_v20180701_siconc_simon_gfdl-cm4_historical_r1i1p1f1_gn_195001-201412.nc\n",
"pangeo_forge_recipes.storage - DEBUG - FSSpecTarget.open yielding <fsspec.implementations.local.LocalFileOpener object at 0x7f6fc3f19370>\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 10000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.03 min: 6.58 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 50000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.10 min: 8.56 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 90000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.18 min: 8.55 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 130000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.27 min: 8.00 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 160000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.34 min: 7.91 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 200000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.45 min: 7.45 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 220000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.52 min: 7.06 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 240000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.60 min: 6.67 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 260000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.67 min: 6.47 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 290000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.76 min: 6.32 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 330000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.86 min: 6.41 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 360000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.94 min: 6.40 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 390000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.02 min: 6.37 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 420000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.11 min: 6.33 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 450000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.19 min: 6.30 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 480000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.27 min: 6.31 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 510000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.34 min: 6.32 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 540000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.45 min: 6.22 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 560000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.51 min: 6.17 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 590000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.60 min: 6.14 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 620000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.69 min: 6.13 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 650000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.77 min: 6.12 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 680000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.86 min: 6.08 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 690000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.92 min: 5.99 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 710000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.03 min: 5.82 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 720000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.09 min: 5.75 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 740000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.17 min: 5.68 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 770000000\n",
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.25 min: 5.70 MB/sec\n",
"pangeo_forge_recipes.storage - DEBUG - FSSpecTarget.open yielded\n",
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems done\n",
"pangeo_forge_recipes.recipes.xarray_zarr - INFO - Opening input with Xarray Index({DimIndex(name='time', index=1, sequence_len=2, operation=<CombineOp.CONCAT: 2>)}): 'http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/SImon/siconc/gn/v20180701/siconc_SImon_GFDL-CM4_historical_r1i1p1f1_gn_195001-201412.nc'\n",
"pangeo_forge_recipes.storage - INFO - Opening 'http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/SImon/siconc/gn/v20180701/siconc_SImon_GFDL-CM4_historical_r1i1p1f1_gn_195001-201412.nc' from cache\n",
"pangeo_forge_recipes.storage - DEBUG - file_opener entering first context for <contextlib._GeneratorContextManager object at 0x7f6fc4792fa0>\n",
"pangeo_forge_recipes.storage - DEBUG - entering fs.open context manager for /tmp/tmp82vlep42/DLYZdCPx/590bf49ef71cc09fcfbaf03224723d6e-http_aims3.llnl.gov_thredds_fileserver_css03_data_cmip6_cmip_noaa-gfdl_gfdl-cm4_historical_r1i1p1f1_simon_siconc_gn_v20180701_siconc_simon_gfdl-cm4_historical_r1i1p1f1_gn_195001-201412.nc\n",
"pangeo_forge_recipes.storage - DEBUG - FSSpecTarget.open yielding <fsspec.implementations.local.LocalFileOpener object at 0x7f6fc3f19370>\n",
"pangeo_forge_recipes.storage - DEBUG - file_opener entering second context for <fsspec.implementations.local.LocalFileOpener object at 0x7f6fc3f19370>\n",
"pangeo_forge_recipes.recipes.xarray_zarr - DEBUG - about to enter xr.open_dataset context on <fsspec.implementations.local.LocalFileOpener object at 0x7f6fc3f19370>\n",
"pangeo_forge_recipes.recipes.xarray_zarr - DEBUG - successfully opened dataset\n",
"pangeo_forge_recipes.recipes.xarray_zarr - DEBUG - <xarray.Dataset>\n",
"Dimensions: (bnds: 2, time: 780, y: 1080, x: 1440, xTe: 1441, yTe: 1081,\n",
" vertex: 4)\n",
"Coordinates:\n",
" * bnds (bnds) float64 1.0 2.0\n",
" * time (time) object 1950-01-16 12:00:00 ... 2014-12-16 12:00:00\n",
" * x (x) float64 -299.7 -299.5 -299.2 -299.0 ... 59.53 59.78 60.03\n",
" * xTe (xTe) float64 -299.8 -299.6 -299.3 -299.1 ... 59.66 59.91 60.16\n",
" * y (y) float64 -80.39 -80.31 -80.23 -80.15 ... 89.73 89.84 89.95\n",
" * yTe (yTe) float64 -80.43 -80.35 -80.27 -80.19 ... 89.78 89.89 90.0\n",
" lon (y, x) float32 ...\n",
" lat (y, x) float32 ...\n",
"Dimensions without coordinates: vertex\n",
"Data variables:\n",
" siconc (time, y, x) float32 ...\n",
" time_bnds (time, bnds) object ...\n",
" lat_bnds (y, x, vertex) float32 ...\n",
" lon_bnds (y, x, vertex) float32 ...\n",
"Attributes: (12/46)\n",
" history: File was processed by fremetar (GFDL analog of CM...\n",
" table_id: SImon\n",
" activity_id: CMIP\n",
" branch_method: standard\n",
" branch_time_in_child: 0.0\n",
" comment: <null ref>\n",
" ... ...\n",
" variable_id: siconc\n",
" variant_info: N/A\n",
" references: see further_info_url attribute\n",
" variant_label: r1i1p1f1\n",
" branch_time_in_parent: 36500.0\n",
" parent_time_units: days since 0001-1-1\n",
"pangeo_forge_recipes.recipes.xarray_zarr - INFO - Caching metadata for input 'Index({DimIndex(name='time', index=1, sequence_len=2, operation=<CombineOp.CONCAT: 2>)})'\n",
"pangeo_forge_recipes.storage - DEBUG - file_opener yielded\n",
"pangeo_forge_recipes.storage - DEBUG - FSSpecTarget.open yielded\n",
"pangeo_forge_recipes.storage - DEBUG - opener done\n"
]
}
],
"source": [
"# Manually cache inputs\n",
"for input_name in recipe.iter_inputs():\n",
" recipe.cache_input(input_name)"
]
},
{
"cell_type": "markdown",
"id": "9729b9cb-afaf-47ff-9015-d074bbda7aff",
"metadata": {},
"source": [
"## Dynamically determine `subset_inputs`"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "fbe5fcfa-8d2b-4ee6-9d08-2526c629836a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"801.356821\n",
"2.0\n",
"1249.376851\n",
"3.0\n"
]
}
],
"source": [
"# iterate through cached files and check size\n",
"# approach 1\n",
"# using getsize function os.path module\n",
"import os\n",
"import pathlib\n",
"cache_path = pathlib.Path(recipe.storage_config.cache.root_path)\n",
"\n",
"subset_inputs_list = []\n",
"for file in cache_path.iterdir():\n",
" file_size = os.path.getsize(file)\n",
" subset_inputs = file_size//5e8 + 1 #Do we have to check if this is more than the elements in the concat_dim? or is that done elsewhere in the code?\n",
" subset_inputs_list.append(subset_inputs)\n",
" print(file_size/1e6)\n",
" print(subset_inputs)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "5ba6804d-b3d2-40b5-9599-ae39f9efe018",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3.0"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Now set a global parameter on the object\n",
"recipe.subset_inputs = max(subset_inputs_list)\n",
"recipe.subset_inputs"
]
},
{
"cell_type": "markdown",
"id": "1d655624-9129-4d9e-9d4e-93162abb2e71",
"metadata": {},
"source": [
"## Dynamically determine `target_chunks`\n"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "690c0ec0-81c7-464b-ae2d-e8ef9dab85a7",
"metadata": {},
"outputs": [],
"source": [
"import xarray as xr\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "8957dc6c-042b-42a0-8af1-b88d32eec9b6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'time': 24}"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"target_chunk_size_range = (100e6, 200e6) # in byte (Maybe this ultimately can accept \"nnnMB\"/\"nnGB\" etc \n",
"\n",
"example_file = list(cache_path.iterdir())[0]\n",
"ds = xr.open_dataset(example_file) # Perhaps there is a way to get the info below without using xarray?\n",
"bytes_per_dim_element = ds.nbytes / len(ds[recipe.concat_dim])\n",
"chunksize = np.round(np.mean(np.array(target_chunk_size_range) / bytes_per_dim_element)) # I guess this could be more sophisticated, but for now should be good\n",
"\n",
"# set the target_chunks\n",
"target_chunks = {recipe.concat_dim: int(chunksize)}\n",
"recipe.target_chunks = target_chunks\n",
"recipe.target_chunks"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4cfe2bcc-e1ae-4119-9fd1-7721f7b90e6b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment