Created
July 27, 2023 13:38
-
-
Save rsignell-usgs/e98225aee5c83ae4710dcb965b083d02 to your computer and use it in GitHub Desktop.
conus404_object_storage_benchmark3.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "c8d52dc5-be86-44ef-a4de-65289808d646", | |
"metadata": {}, | |
"source": [ | |
"# Compare data load times for CONUS404\n", | |
"Computes mean over 1 year from hourly CONUS404 data with 60 cpus (30 workers w/2 cpus each) \n", | |
"## Test case #3: Use compute in US-WEST-2 \n", | |
"* AWS S3 in us-west-2\n", | |
"* OSN pod design #1 at RENCI\n", | |
"* OSN pod design #2 at MGHPCC\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "547b851f-3dd9-4b9f-988b-54c743a43bbe", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import fsspec\n", | |
"import xarray as xr\n", | |
"import intake\n", | |
"from time import sleep" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "bceea39d-78e2-4480-aaf3-0ead09b7e09a", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"#%run /shared/users/environment_set_up/Start_Dask_Cluster_Nebari.ipynb\n", | |
"## If this notebook is not being run on Nebari/ESIP, replace the above \n", | |
"## path name with a helper appropriate to your compute environment. Examples:\n", | |
"# %run ../environment_set_up/Start_Dask_Cluster_Denali.ipynb\n", | |
"# %run ../environment_set_up/Start_Dask_Cluster_Tallgrass.ipynb" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "ab4872ce-ad63-494f-8aa6-e0ffbf04a12e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"n_workers = 30\n", | |
"nthreads = 2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "1e82ecf1-363b-42d3-a769-4587938f539c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">╭───────────────────────────────────────── <span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">Coiled Cluster</span> ─────────────────────────────────────────╮\n", | |
"│ <a href=\"https://cloud.coiled.io/clusters/248592?account=dask\" target=\"_blank\">https://cloud.coiled.io/clusters/248592?account=dask</a> │\n", | |
"╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n", | |
"╭─────────────────── Overview ───────────────────╮╭──────────────── Configuration ─────────────────╮\n", | |
"│ ││ │\n", | |
"│ <span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">Cluster Name:</span> dask-d573f50a-0 ││ <span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">Region:</span> us-west-2 │\n", | |
"│ ││ │\n", | |
"│ <span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">Scheduler Status:</span> started ││ <span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">Scheduler Instance Type:</span> m7g.xlarge │\n", | |
"│ ││ │\n", | |
"│ <span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">Dashboard Address:</span> ││ <span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">Worker Instance Type(s):</span> t4g.xlarge (30) │\n", | |
"│ <a href=\"https://cluster-kvuri.dask.host:8787?token=85SdIdrRtKAbqGBp\" target=\"_blank\">https://cluster-kvuri.dask.host:8787?token=85S</a> ││ │\n", | |
"│ <a href=\"https://cluster-kvuri.dask.host:8787?token=85SdIdrRtKAbqGBp\" target=\"_blank\">dIdrRtKAbqGBp</a> ││ <span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">Workers Requested:</span> 30 │\n", | |
"│ ││ │\n", | |
"╰────────────────────────────────────────────────╯╰────────────────────────────────────────────────╯\n", | |
"╭─────────────────────────────────── (2023/07/27 13:24:45 UTC) ────────────────────────────────────╮\n", | |
"│ │\n", | |
"│ All workers ready. │\n", | |
"│ │\n", | |
"│ │\n", | |
"╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n", | |
"\n", | |
"\n", | |
"\n", | |
"</pre>\n" | |
], | |
"text/plain": [ | |
"╭───────────────────────────────────────── \u001b[1;51;32mCoiled Cluster\u001b[0m ─────────────────────────────────────────╮\n", | |
"│ \u001b]8;id=667257;https://cloud.coiled.io/clusters/248592?account=dask\u001b\\https://cloud.coiled.io/clusters/248592?account=dask\u001b]8;;\u001b\\ │\n", | |
"╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n", | |
"╭─────────────────── Overview ───────────────────╮╭──────────────── Configuration ─────────────────╮\n", | |
"│ ││ │\n", | |
"│ \u001b[1;32mCluster Name:\u001b[0m dask-d573f50a-0 ││ \u001b[1;32mRegion:\u001b[0m us-west-2 │\n", | |
"│ ││ │\n", | |
"│ \u001b[1;32mScheduler Status:\u001b[0m started ││ \u001b[1;32mScheduler Instance Type:\u001b[0m m7g.xlarge │\n", | |
"│ ││ │\n", | |
"│ \u001b[1;32mDashboard Address:\u001b[0m ││ \u001b[1;32mWorker Instance Type(s):\u001b[0m t4g.xlarge (30) │\n", | |
"│ \u001b]8;id=430169;https://cluster-kvuri.dask.host:8787?token=85SdIdrRtKAbqGBp\u001b\\https://cluster-kvuri.dask.host:8787?token=85S\u001b]8;;\u001b\\ ││ │\n", | |
"│ \u001b]8;id=430169;https://cluster-kvuri.dask.host:8787?token=85SdIdrRtKAbqGBp\u001b\\dIdrRtKAbqGBp\u001b]8;;\u001b\\ ││ \u001b[1;32mWorkers Requested:\u001b[0m 30 │\n", | |
"│ ││ │\n", | |
"╰────────────────────────────────────────────────╯╰────────────────────────────────────────────────╯\n", | |
"╭─────────────────────────────────── (2023/07/27 13:24:45 UTC) ────────────────────────────────────╮\n", | |
"│ │\n", | |
"│ All workers ready. │\n", | |
"│ │\n", | |
"│ │\n", | |
"╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n", | |
"\n", | |
"\n", | |
"\n" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n" | |
], | |
"text/plain": [] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"import coiled\n", | |
"\n", | |
"cluster = coiled.Cluster(\n", | |
" region=\"us-west-2\",\n", | |
" compute_purchase_option=\"spot_with_fallback\",\n", | |
" arm=True,\n", | |
" scheduler_port=443,\n", | |
" wait_for_workers=True,\n", | |
" n_workers=n_workers,\n", | |
" worker_options=dict(nthreads=nthreads),\n", | |
" account='dask'\n", | |
")\n", | |
"\n", | |
"client = cluster.get_client()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "067d1b7d", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['conus404-catalog',\n", | |
" 'conus404-drb-eval-tutorial-catalog',\n", | |
" 'nhm-v1.0-daymet-catalog',\n", | |
" 'nhm-v1.1-c404-bc-catalog',\n", | |
" 'nhm-v1.1-gridmet-catalog',\n", | |
" 'nwis-streamflow-usgs-gages-onprem',\n", | |
" 'nwis-streamflow-usgs-gages-cloud',\n", | |
" 'nwm21-streamflow-usgs-gages-onprem',\n", | |
" 'nwm21-streamflow-usgs-gages-cloud',\n", | |
" 'nwm21-streamflow-cloud',\n", | |
" 'nwm21-scores',\n", | |
" 'lcmap-cloud',\n", | |
" 'rechunking-tutorial-cloud']" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# open the hytest data intake catalog\n", | |
"hytest_cat = intake.open_catalog(\n", | |
" r\"https://raw.githubusercontent.com/hytest-org/hytest/main/dataset_catalog/hytest_intake_catalog.yml\"\n", | |
")\n", | |
"list(hytest_cat)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "25db9324", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['conus404-hourly-onprem',\n", | |
" 'conus404-hourly-cloud',\n", | |
" 'conus404-hourly-osn',\n", | |
" 'conus404-hourly-osn2',\n", | |
" 'conus404-daily-diagnostic-onprem',\n", | |
" 'conus404-daily-diagnostic-cloud',\n", | |
" 'conus404-daily-diagnostic-osn',\n", | |
" 'conus404-daily-onprem',\n", | |
" 'conus404-daily-cloud',\n", | |
" 'conus404-daily-osn',\n", | |
" 'conus404-daily-osn2',\n", | |
" 'conus404-monthly-onprem',\n", | |
" 'conus404-monthly-cloud',\n", | |
" 'conus404-monthly-osn']" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# open the conus404 sub-catalog\n", | |
"cat = hytest_cat['conus404-catalog']\n", | |
"list(cat)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "20a89fa0-740d-4388-ac1c-a3f0b450f4dc", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"year = '1990'" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "7ed7aeac-90fb-471b-9f25-5dcbc6717659", | |
"metadata": {}, | |
"source": [ | |
"#### AWS S3 storage in us-west-2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "5a0d6b25-2803-46c5-b15d-6343243bfe0a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"s3://nhgf-development/conus404/conus404_hourly_202209.zarr\n", | |
"9.89 s ± 1.01 s per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"dataset = 'conus404-hourly-cloud'\n", | |
"print(cat[dataset].urlpath)\n", | |
"\n", | |
"ds = cat[dataset].to_dask()\n", | |
"%timeit da = ds.PREC_ACC_NC.sel(time=year).mean(dim='time').compute()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "7f9b8153-b7c7-4191-9842-07a55fb7ed7f", | |
"metadata": {}, | |
"source": [ | |
"#### OSN storage pod design #1 at RENCI " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "d3b2ce8a-6499-4413-861c-65d48f7af108", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"https://renc.osn.xsede.org\n", | |
"s3://rsignellbucket2/hytest/conus404/conus404_hourly_202302.zarr\n", | |
"46.1 s ± 3.15 s per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"dataset = 'conus404-hourly-osn' \n", | |
"print(cat[dataset].storage_options['client_kwargs']['endpoint_url'])\n", | |
"print(cat[dataset].urlpath)\n", | |
"\n", | |
"ds = cat[dataset].to_dask()\n", | |
"%timeit da = ds.PREC_ACC_NC.sel(time=year).mean(dim='time').compute()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "0f7ba745-a308-4d9c-a41b-86d7b111b00b", | |
"metadata": {}, | |
"source": [ | |
"#### OSN storage pod design #2 at MGHPCC" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "18144fef-179a-4805-93a6-baf559f930b5", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"https://usgs.osn.mghpcc.org\n", | |
"s3://usgspod-testbucket/hytest/conus404/conus404_hourly_202302.zarr\n", | |
"23.8 s ± 319 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"dataset = 'conus404-hourly-osn2' \n", | |
"print(cat[dataset].storage_options['client_kwargs']['endpoint_url'])\n", | |
"print(cat[dataset].urlpath)\n", | |
"\n", | |
"ds = cat[dataset].to_dask()\n", | |
"%timeit da = ds.PREC_ACC_NC.sel(time=year).mean(dim='time').compute()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "9230a094-4dcd-4552-852e-be62cb29e15d", | |
"metadata": {}, | |
"source": [ | |
"## Stop cluster" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "c3aff08b-975d-47e1-ac51-6bb8e8b1adbc", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"client.close()\n", | |
"sleep(5)\n", | |
"cluster.shutdown()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "bea02d75-1fce-44f4-8bc6-f98fe132088a", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "nwm:Python", | |
"language": "python", | |
"name": "conda-env-nwm-py" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.4" | |
}, | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"state": {}, | |
"version_major": 2, | |
"version_minor": 0 | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment