Skip to content

Instantly share code, notes, and snippets.

@rmg55
Last active July 26, 2022 10:21
Show Gist options
  • Save rmg55/875a2b79ee695007a78ae615f1c916b2 to your computer and use it in GitHub Desktop.
Save rmg55/875a2b79ee695007a78ae615f1c916b2 to your computer and use it in GitHub Desktop.
Wrapping Raster to Xarray+Dask
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import xarray as xr\n",
"xr.set_options(display_style='text')\n",
"import rasterio\n",
"from rasterio import crs, MemoryFile\n",
"from rasterio.vrt import WarpedVRT\n",
"from rasterio.shutil import copy as rio_copy\n",
"import rioxarray\n",
"from dask.distributed import LocalCluster,Client,performance_report,wait"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n"
]
},
{
"data": {
"text/html": [
"<table style=\"border: 2px solid white;\">\n",
"<tr>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3 style=\"text-align: left;\">Client</h3>\n",
"<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n",
" <li><b>Scheduler: </b>tcp://127.0.0.1:54562</li>\n",
" <li><b>Dashboard: </b><a href='http://127.0.0.1:8787/status' target='_blank'>http://127.0.0.1:8787/status</a></li>\n",
"</ul>\n",
"</td>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3 style=\"text-align: left;\">Cluster</h3>\n",
"<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n",
" <li><b>Workers: </b>6</li>\n",
" <li><b>Cores: </b>12</li>\n",
" <li><b>Memory: </b>34.10 GB</li>\n",
"</ul>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<Client: 'tcp://127.0.0.1:54562' processes=6 threads=12, memory=34.10 GB>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clust = LocalCluster(n_workers=6,threads_per_worker=2,dashboard_address=':8787')\n",
"cl = Client(clust)\n",
"cl"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Source CRS:EPSG:32613\n",
"Destination CRS:EPSG:4326\n"
]
},
{
"data": {
"text/html": [
"<pre>&lt;xarray.Dataset&gt;\n",
"Dimensions: (band: 1, x: 4194, y: 3081)\n",
"Coordinates:\n",
" * band (band) int64 1\n",
" * y (y) float64 43.35 43.35 43.35 43.35 ... 42.36 42.36 42.36 42.36\n",
" * x (x) float64 -106.2 -106.2 -106.2 ... -104.9 -104.9 -104.9\n",
" spatial_ref int64 0\n",
"Data variables:\n",
" HLS_Red (band, y, x) int16 dask.array&lt;chunksize=(1, 1500, 1500), meta=np.ndarray&gt;</pre>"
],
"text/plain": [
"<xarray.Dataset>\n",
"Dimensions: (band: 1, x: 4194, y: 3081)\n",
"Coordinates:\n",
" * band (band) int64 1\n",
" * y (y) float64 43.35 43.35 43.35 43.35 ... 42.36 42.36 42.36 42.36\n",
" * x (x) float64 -106.2 -106.2 -106.2 ... -104.9 -104.9 -104.9\n",
" spatial_ref int64 0\n",
"Data variables:\n",
" HLS_Red (band, y, x) int16 dask.array<chunksize=(1, 1500, 1500), meta=np.ndarray>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f = 'https://hlssa.blob.core.windows.net/hls/L309/HLS.L30.T13TDH.2019165.v1.4_04.tif'\n",
"epsg_to = 4326\n",
"with rasterio.open(f) as src:\n",
" print('Source CRS:' +str(src.crs))\n",
" with WarpedVRT(src,resampling=1,src_crs=src.crs,crs=crs.CRS.from_epsg(epsg_to),warp_mem_limit=12000,warp_extras={'NUM_THREADS':2}) as vrt:\n",
" print('Destination CRS:' +str(vrt.crs))\n",
" ds = rioxarray.open_rasterio(vrt).chunk({'x':1500,'y':1500,'band':1}).to_dataset(name='HLS_Red')\n",
"ds"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
"distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n"
]
},
{
"data": {
"text/html": [
"<pre>&lt;xarray.Dataset&gt;\n",
"Dimensions: (band: 1, x: 4194, y: 3081)\n",
"Coordinates:\n",
" * band (band) int64 1\n",
" * y (y) float64 43.35 43.35 43.35 43.35 ... 42.36 42.36 42.36 42.36\n",
" * x (x) float64 -106.2 -106.2 -106.2 ... -104.9 -104.9 -104.9\n",
" spatial_ref int64 0\n",
"Data variables:\n",
" HLS_Red (band, y, x) int16 dask.array&lt;chunksize=(1, 1500, 1500), meta=np.ndarray&gt;</pre>"
],
"text/plain": [
"<xarray.Dataset>\n",
"Dimensions: (band: 1, x: 4194, y: 3081)\n",
"Coordinates:\n",
" * band (band) int64 1\n",
" * y (y) float64 43.35 43.35 43.35 43.35 ... 42.36 42.36 42.36 42.36\n",
" * x (x) float64 -106.2 -106.2 -106.2 ... -104.9 -104.9 -104.9\n",
" spatial_ref int64 0\n",
"Data variables:\n",
" HLS_Red (band, y, x) int16 dask.array<chunksize=(1, 1500, 1500), meta=np.ndarray>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with performance_report(filename='xr_wrap.html'):\n",
" ds = ds.persist()\n",
" wait(ds)\n",
"ds"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:py_geo]",
"language": "python",
"name": "conda-env-py_geo-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment