Last active
May 3, 2022 21:17
-
-
Save jbusecke/7cc72f4667bb5b23849a6db7d02d6d91 to your computer and use it in GitHub Desktop.
Pangeo Forge Recipes: Dynamically determine `subset_inputs` and `target_chunks` from cached files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "b0b6eabe-b411-40b5-8687-642be7a7406c", | |
"metadata": {}, | |
"source": [ | |
"# Local testing for cmip6-feedstock\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "a22aff8d-f27c-402c-bce7-3d23520cf947", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# !mamba install pangeo-forge-recipes -y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "a39acc93-ec5b-4c3a-acec-ea4ece2974c4", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&institution_id=CCCma&source_id=CanESM5&experiment_id=historical&member_id=r1i1p1f1&table_id=Omon&variable_id=zos&grid_label=gn&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&limit=500&offset=0\n", | |
"https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&institution_id=CCCma&source_id=CanESM5&experiment_id=historical&member_id=r1i1p1f1&table_id=Omon&variable_id=so&grid_label=gn&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&limit=500&offset=0\n", | |
"https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&institution_id=MOHC&source_id=UKESM1-0-LL&experiment_id=historical&member_id=r1i1p1f2&table_id=SImon&variable_id=siitdconc&grid_label=gn&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&limit=500&offset=0\n", | |
"https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&institution_id=NOAA-GFDL&source_id=GFDL-CM4&experiment_id=historical&member_id=r1i1p1f1&table_id=SImon&variable_id=sithick&grid_label=gn&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&limit=500&offset=0\n", | |
"https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&institution_id=NOAA-GFDL&source_id=GFDL-CM4&experiment_id=historical&member_id=r1i1p1f1&table_id=SImon&variable_id=siconc&grid_label=gn&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&limit=500&offset=0\n" | |
] | |
} | |
], | |
"source": [ | |
"from pangeo_forge_recipes.recipes import setup_logging\n", | |
"from feedstock.recipe import recipe_4 as recipe # GFDL" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "42a04806-5264-4f87-b348-915bb34fc911", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"setup_logging('DEBUG')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "02441b87-1356-496a-bc0e-1183ae5637c4", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"pangeo_forge_recipes.recipes.xarray_zarr - INFO - Caching input 'Index({DimIndex(name='time', index=0, sequence_len=2, operation=<CombineOp.CONCAT: 2>)})'\n", | |
"pangeo_forge_recipes.storage - INFO - Caching file 'http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/SImon/siconc/gn/v20180701/siconc_SImon_GFDL-CM4_historical_r1i1p1f1_gn_185001-194912.nc'\n", | |
"pangeo_forge_recipes.storage - INFO - Copying remote file 'http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/SImon/siconc/gn/v20180701/siconc_SImon_GFDL-CM4_historical_r1i1p1f1_gn_185001-194912.nc' to cache\n", | |
"pangeo_forge_recipes.storage - DEBUG - entering fs.open context manager for /tmp/tmp82vlep42/DLYZdCPx/3c23bdda4d0f36b2f70b33097928e091-http_aims3.llnl.gov_thredds_fileserver_css03_data_cmip6_cmip_noaa-gfdl_gfdl-cm4_historical_r1i1p1f1_simon_siconc_gn_v20180701_siconc_simon_gfdl-cm4_historical_r1i1p1f1_gn_185001-194912.nc\n", | |
"pangeo_forge_recipes.storage - DEBUG - FSSpecTarget.open yielding <fsspec.implementations.local.LocalFileOpener object at 0x7f6fc3f192b0>\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 10000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.02 min: 9.27 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 60000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.10 min: 10.19 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 100000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.19 min: 8.90 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 130000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.26 min: 8.46 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 160000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.35 min: 7.66 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 180000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.43 min: 6.90 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 200000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.51 min: 6.48 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 230000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.60 min: 6.41 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 270000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.68 min: 6.65 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 310000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.77 min: 6.73 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 350000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.85 min: 6.84 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 370000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.92 min: 6.70 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 400000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.01 min: 6.61 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 430000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.09 min: 6.56 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 460000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.17 min: 6.56 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 500000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.26 min: 6.63 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 540000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.36 min: 6.61 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 560000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.42 min: 6.55 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 590000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.52 min: 6.47 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 610000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.59 min: 6.39 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 640000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.70 min: 6.28 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 660000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.77 min: 6.21 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 680000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.84 min: 6.17 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 720000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.92 min: 6.26 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 770000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.02 min: 6.36 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 800000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.11 min: 6.33 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 820000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.17 min: 6.30 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 850000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.28 min: 6.22 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 860000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.34 min: 6.13 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 880000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.45 min: 5.98 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 890000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.51 min: 5.92 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 910000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.61 min: 5.82 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 930000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.67 min: 5.80 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 960000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.79 min: 5.74 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 970000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.84 min: 5.68 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 990000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.96 min: 5.58 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1000000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.01 min: 5.53 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1020000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.12 min: 5.46 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1040000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.20 min: 5.42 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1050000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.26 min: 5.37 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1070000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.39 min: 5.26 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1080000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.44 min: 5.24 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1110000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.52 min: 5.25 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1140000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.59 min: 5.29 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1180000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.69 min: 5.34 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 1220000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 3.77 min: 5.40 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - FSSpecTarget.open yielded\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems done\n", | |
"pangeo_forge_recipes.recipes.xarray_zarr - INFO - Opening input with Xarray Index({DimIndex(name='time', index=0, sequence_len=2, operation=<CombineOp.CONCAT: 2>)}): 'http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/SImon/siconc/gn/v20180701/siconc_SImon_GFDL-CM4_historical_r1i1p1f1_gn_185001-194912.nc'\n", | |
"pangeo_forge_recipes.storage - INFO - Opening 'http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/SImon/siconc/gn/v20180701/siconc_SImon_GFDL-CM4_historical_r1i1p1f1_gn_185001-194912.nc' from cache\n", | |
"pangeo_forge_recipes.storage - DEBUG - file_opener entering first context for <contextlib._GeneratorContextManager object at 0x7f6fc47929d0>\n", | |
"pangeo_forge_recipes.storage - DEBUG - entering fs.open context manager for /tmp/tmp82vlep42/DLYZdCPx/3c23bdda4d0f36b2f70b33097928e091-http_aims3.llnl.gov_thredds_fileserver_css03_data_cmip6_cmip_noaa-gfdl_gfdl-cm4_historical_r1i1p1f1_simon_siconc_gn_v20180701_siconc_simon_gfdl-cm4_historical_r1i1p1f1_gn_185001-194912.nc\n", | |
"pangeo_forge_recipes.storage - DEBUG - FSSpecTarget.open yielding <fsspec.implementations.local.LocalFileOpener object at 0x7f6fc3f19190>\n", | |
"pangeo_forge_recipes.storage - DEBUG - file_opener entering second context for <fsspec.implementations.local.LocalFileOpener object at 0x7f6fc3f19190>\n", | |
"pangeo_forge_recipes.recipes.xarray_zarr - DEBUG - about to enter xr.open_dataset context on <fsspec.implementations.local.LocalFileOpener object at 0x7f6fc3f19190>\n", | |
"pangeo_forge_recipes.recipes.xarray_zarr - DEBUG - successfully opened dataset\n", | |
"pangeo_forge_recipes.recipes.xarray_zarr - DEBUG - <xarray.Dataset>\n", | |
"Dimensions: (bnds: 2, time: 1200, y: 1080, x: 1440, xTe: 1441, yTe: 1081,\n", | |
" vertex: 4)\n", | |
"Coordinates:\n", | |
" * bnds (bnds) float64 1.0 2.0\n", | |
" * time (time) object 1850-01-16 12:00:00 ... 1949-12-16 12:00:00\n", | |
" * x (x) float64 -299.7 -299.5 -299.2 -299.0 ... 59.53 59.78 60.03\n", | |
" * xTe (xTe) float64 -299.8 -299.6 -299.3 -299.1 ... 59.66 59.91 60.16\n", | |
" * y (y) float64 -80.39 -80.31 -80.23 -80.15 ... 89.73 89.84 89.95\n", | |
" * yTe (yTe) float64 -80.43 -80.35 -80.27 -80.19 ... 89.78 89.89 90.0\n", | |
" lon (y, x) float32 ...\n", | |
" lat (y, x) float32 ...\n", | |
"Dimensions without coordinates: vertex\n", | |
"Data variables:\n", | |
" siconc (time, y, x) float32 ...\n", | |
" time_bnds (time, bnds) object ...\n", | |
" lat_bnds (y, x, vertex) float32 ...\n", | |
" lon_bnds (y, x, vertex) float32 ...\n", | |
"Attributes: (12/46)\n", | |
" history: File was processed by fremetar (GFDL analog of CM...\n", | |
" table_id: SImon\n", | |
" activity_id: CMIP\n", | |
" branch_method: standard\n", | |
" branch_time_in_child: 0.0\n", | |
" comment: <null ref>\n", | |
" ... ...\n", | |
" variable_id: siconc\n", | |
" variant_info: N/A\n", | |
" references: see further_info_url attribute\n", | |
" variant_label: r1i1p1f1\n", | |
" branch_time_in_parent: 36500.0\n", | |
" parent_time_units: days since 0001-1-1\n", | |
"pangeo_forge_recipes.recipes.xarray_zarr - INFO - Caching metadata for input 'Index({DimIndex(name='time', index=0, sequence_len=2, operation=<CombineOp.CONCAT: 2>)})'\n", | |
"pangeo_forge_recipes.storage - DEBUG - file_opener yielded\n", | |
"pangeo_forge_recipes.storage - DEBUG - FSSpecTarget.open yielded\n", | |
"pangeo_forge_recipes.storage - DEBUG - opener done\n", | |
"pangeo_forge_recipes.recipes.xarray_zarr - INFO - Caching input 'Index({DimIndex(name='time', index=1, sequence_len=2, operation=<CombineOp.CONCAT: 2>)})'\n", | |
"pangeo_forge_recipes.storage - INFO - Caching file 'http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/SImon/siconc/gn/v20180701/siconc_SImon_GFDL-CM4_historical_r1i1p1f1_gn_195001-201412.nc'\n", | |
"pangeo_forge_recipes.storage - INFO - Copying remote file 'http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/SImon/siconc/gn/v20180701/siconc_SImon_GFDL-CM4_historical_r1i1p1f1_gn_195001-201412.nc' to cache\n", | |
"pangeo_forge_recipes.storage - DEBUG - entering fs.open context manager for /tmp/tmp82vlep42/DLYZdCPx/590bf49ef71cc09fcfbaf03224723d6e-http_aims3.llnl.gov_thredds_fileserver_css03_data_cmip6_cmip_noaa-gfdl_gfdl-cm4_historical_r1i1p1f1_simon_siconc_gn_v20180701_siconc_simon_gfdl-cm4_historical_r1i1p1f1_gn_195001-201412.nc\n", | |
"pangeo_forge_recipes.storage - DEBUG - FSSpecTarget.open yielding <fsspec.implementations.local.LocalFileOpener object at 0x7f6fc3f19370>\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 10000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.03 min: 6.58 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 50000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.10 min: 8.56 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 90000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.18 min: 8.55 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 130000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.27 min: 8.00 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 160000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.34 min: 7.91 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 200000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.45 min: 7.45 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 220000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.52 min: 7.06 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 240000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.60 min: 6.67 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 260000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.67 min: 6.47 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 290000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.76 min: 6.32 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 330000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.86 min: 6.41 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 360000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 0.94 min: 6.40 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 390000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.02 min: 6.37 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 420000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.11 min: 6.33 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 450000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.19 min: 6.30 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 480000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.27 min: 6.31 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 510000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.34 min: 6.32 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 540000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.45 min: 6.22 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 560000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.51 min: 6.17 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 590000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.60 min: 6.14 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 620000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.69 min: 6.13 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 650000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.77 min: 6.12 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 680000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.86 min: 6.08 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 690000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 1.92 min: 5.99 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 710000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.03 min: 5.82 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 720000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.09 min: 5.75 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 740000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.17 min: 5.68 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems total bytes copied: 770000000\n", | |
"pangeo_forge_recipes.storage - DEBUG - avg throughput over 2.25 min: 5.70 MB/sec\n", | |
"pangeo_forge_recipes.storage - DEBUG - FSSpecTarget.open yielded\n", | |
"pangeo_forge_recipes.storage - DEBUG - _copy_btw_filesystems done\n", | |
"pangeo_forge_recipes.recipes.xarray_zarr - INFO - Opening input with Xarray Index({DimIndex(name='time', index=1, sequence_len=2, operation=<CombineOp.CONCAT: 2>)}): 'http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/SImon/siconc/gn/v20180701/siconc_SImon_GFDL-CM4_historical_r1i1p1f1_gn_195001-201412.nc'\n", | |
"pangeo_forge_recipes.storage - INFO - Opening 'http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/SImon/siconc/gn/v20180701/siconc_SImon_GFDL-CM4_historical_r1i1p1f1_gn_195001-201412.nc' from cache\n", | |
"pangeo_forge_recipes.storage - DEBUG - file_opener entering first context for <contextlib._GeneratorContextManager object at 0x7f6fc4792fa0>\n", | |
"pangeo_forge_recipes.storage - DEBUG - entering fs.open context manager for /tmp/tmp82vlep42/DLYZdCPx/590bf49ef71cc09fcfbaf03224723d6e-http_aims3.llnl.gov_thredds_fileserver_css03_data_cmip6_cmip_noaa-gfdl_gfdl-cm4_historical_r1i1p1f1_simon_siconc_gn_v20180701_siconc_simon_gfdl-cm4_historical_r1i1p1f1_gn_195001-201412.nc\n", | |
"pangeo_forge_recipes.storage - DEBUG - FSSpecTarget.open yielding <fsspec.implementations.local.LocalFileOpener object at 0x7f6fc3f19370>\n", | |
"pangeo_forge_recipes.storage - DEBUG - file_opener entering second context for <fsspec.implementations.local.LocalFileOpener object at 0x7f6fc3f19370>\n", | |
"pangeo_forge_recipes.recipes.xarray_zarr - DEBUG - about to enter xr.open_dataset context on <fsspec.implementations.local.LocalFileOpener object at 0x7f6fc3f19370>\n", | |
"pangeo_forge_recipes.recipes.xarray_zarr - DEBUG - successfully opened dataset\n", | |
"pangeo_forge_recipes.recipes.xarray_zarr - DEBUG - <xarray.Dataset>\n", | |
"Dimensions: (bnds: 2, time: 780, y: 1080, x: 1440, xTe: 1441, yTe: 1081,\n", | |
" vertex: 4)\n", | |
"Coordinates:\n", | |
" * bnds (bnds) float64 1.0 2.0\n", | |
" * time (time) object 1950-01-16 12:00:00 ... 2014-12-16 12:00:00\n", | |
" * x (x) float64 -299.7 -299.5 -299.2 -299.0 ... 59.53 59.78 60.03\n", | |
" * xTe (xTe) float64 -299.8 -299.6 -299.3 -299.1 ... 59.66 59.91 60.16\n", | |
" * y (y) float64 -80.39 -80.31 -80.23 -80.15 ... 89.73 89.84 89.95\n", | |
" * yTe (yTe) float64 -80.43 -80.35 -80.27 -80.19 ... 89.78 89.89 90.0\n", | |
" lon (y, x) float32 ...\n", | |
" lat (y, x) float32 ...\n", | |
"Dimensions without coordinates: vertex\n", | |
"Data variables:\n", | |
" siconc (time, y, x) float32 ...\n", | |
" time_bnds (time, bnds) object ...\n", | |
" lat_bnds (y, x, vertex) float32 ...\n", | |
" lon_bnds (y, x, vertex) float32 ...\n", | |
"Attributes: (12/46)\n", | |
" history: File was processed by fremetar (GFDL analog of CM...\n", | |
" table_id: SImon\n", | |
" activity_id: CMIP\n", | |
" branch_method: standard\n", | |
" branch_time_in_child: 0.0\n", | |
" comment: <null ref>\n", | |
" ... ...\n", | |
" variable_id: siconc\n", | |
" variant_info: N/A\n", | |
" references: see further_info_url attribute\n", | |
" variant_label: r1i1p1f1\n", | |
" branch_time_in_parent: 36500.0\n", | |
" parent_time_units: days since 0001-1-1\n", | |
"pangeo_forge_recipes.recipes.xarray_zarr - INFO - Caching metadata for input 'Index({DimIndex(name='time', index=1, sequence_len=2, operation=<CombineOp.CONCAT: 2>)})'\n", | |
"pangeo_forge_recipes.storage - DEBUG - file_opener yielded\n", | |
"pangeo_forge_recipes.storage - DEBUG - FSSpecTarget.open yielded\n", | |
"pangeo_forge_recipes.storage - DEBUG - opener done\n" | |
] | |
} | |
], | |
"source": [ | |
"# Manually cache inputs\n", | |
"for input_name in recipe.iter_inputs():\n", | |
" recipe.cache_input(input_name)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "9729b9cb-afaf-47ff-9015-d074bbda7aff", | |
"metadata": {}, | |
"source": [ | |
"## Dynamically determine `subset_inputs`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"id": "fbe5fcfa-8d2b-4ee6-9d08-2526c629836a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"801.356821\n", | |
"2.0\n", | |
"1249.376851\n", | |
"3.0\n" | |
] | |
} | |
], | |
"source": [ | |
"# iterate through cached files and check size\n", | |
"# approach 1\n", | |
"# using getsize function os.path module\n", | |
"import os\n", | |
"import pathlib\n", | |
"cache_path = pathlib.Path(recipe.storage_config.cache.root_path)\n", | |
"\n", | |
"subset_inputs_list = []\n", | |
"for file in cache_path.iterdir():\n", | |
" file_size = os.path.getsize(file)\n", | |
" subset_inputs = file_size//5e8 + 1 #Do we have to check if this is more than the elements in the concat_dim? or is that done elsewhere in the code?\n", | |
" subset_inputs_list.append(subset_inputs)\n", | |
" print(file_size/1e6)\n", | |
" print(subset_inputs)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"id": "5ba6804d-b3d2-40b5-9599-ae39f9efe018", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"3.0" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Now set a global parameter on the object\n", | |
"recipe.subset_inputs = max(subset_inputs_list)\n", | |
"recipe.subset_inputs" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "1d655624-9129-4d9e-9d4e-93162abb2e71", | |
"metadata": {}, | |
"source": [ | |
"## Dynamically determine `target_chunks`\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"id": "690c0ec0-81c7-464b-ae2d-e8ef9dab85a7", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import xarray as xr\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"id": "8957dc6c-042b-42a0-8af1-b88d32eec9b6", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'time': 24}" | |
] | |
}, | |
"execution_count": 36, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"target_chunk_size_range = (100e6, 200e6) # in byte (Maybe this ultimately can accept \"nnnMB\"/\"nnGB\" etc \n", | |
"\n", | |
"example_file = list(cache_path.iterdir())[0]\n", | |
"ds = xr.open_dataset(example_file) # Perhaps there is a way to get the info below without using xarray?\n", | |
"bytes_per_dim_element = ds.nbytes / len(ds[recipe.concat_dim])\n", | |
"chunksize = np.round(np.mean(np.array(target_chunk_size_range) / bytes_per_dim_element)) # I guess this could be more sophisticated, but for now should be good\n", | |
"\n", | |
"# set the target_chunks\n", | |
"target_chunks = {recipe.concat_dim: int(chunksize)}\n", | |
"recipe.target_chunks = target_chunks\n", | |
"recipe.target_chunks" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "4cfe2bcc-e1ae-4119-9fd1-7721f7b90e6b", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment