Last active
November 15, 2017 21:32
-
-
Save grovduck/88e8d28411115e8bf8a3f096b312c45a to your computer and use it in GitHub Desktop.
[Rasterio chunks based on RAM] Script to return window address based on user-defined allowable RAM usage. This allows the user to do fewer reads on rasterio data. #rasterio #numpy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This script returns window addresses based on user-defined maximum RAM | |
usage. It is similar to rasterio.block_windows but allows the user to | |
potentially do fewer reads on rasterio data. | |
""" | |
import rasterio | |
import numpy as np | |
def chunk_windows(r, indexes=None, max_ram=250000000): | |
""" | |
Determine windows for reading a rasterio dataset based on a | |
user-specified RAM limit | |
Parameters | |
---------- | |
r : rasterio.DatasetReader | |
The raster used to determine chunk size | |
indexes : int or sequence | |
The subset of indexes (bands) to use to determine chunk size. | |
Defaults to None, which uses all bands in the raster | |
max_ram : int | |
The maximum RAM to allocate per chunk. Defaults to ~250MB. | |
Returns | |
------- | |
A generator with the chunk index and chunk window address as a tuple. | |
This is the same structure that is returned by rasterio.block_windows | |
""" | |
# Get the indexes (bands) to use - these are 1-indexed (instead of 0) | |
if indexes is None: | |
indexes = r.indexes | |
elif isinstance(indexes, int): | |
indexes = [indexes] | |
if not indexes: | |
raise ValueError('No indexes to read') | |
# Calculate the size of a pixel across all bands | |
pixel_size = 0 | |
for bidx in indexes: | |
if bidx not in r.indexes: | |
raise IndexError('band index out of range') | |
idx = r.indexes.index(bidx) | |
pixel_size += np.dtype(r.dtypes[idx]).itemsize | |
# Calculate how many pixels to collect per chunk | |
chunk_size, _ = divmod(max_ram, pixel_size) | |
# Short circuit the case where the entire raster fits in one chunk | |
r_h, r_w = r.height, r.width | |
if chunk_size >= r_h * r_w: | |
yield (0, 0), ((0, r_h), (0, r_w)) | |
# Otherwise calculate how many "block rows" (block height * raster width) | |
# will fit in one chunk (chunk_height) | |
else: | |
b_h, b_w = r.block_shapes[0] | |
d, _ = divmod(chunk_size, r_w * b_h) | |
chunk_height = d * b_h | |
# Calculate the number of chunks and return the chunk windows | |
d, m = divmod(r_h, chunk_height) | |
n_chunks = d + int(m>0) | |
for i in range(n_chunks): | |
row = i * chunk_height | |
height = min(chunk_height, r_h - row) | |
yield (i, 0), ((row, row+chunk_height), (0, r_w)) | |
if __name__ == '__main__': | |
import sys | |
# Raster to read | |
fn = sys.argv[1] | |
# Maximum RAM to use per chunk | |
max_ram = int(sys.argv[2]) | |
# Get chunks (full rows) and print the window and maximum value | |
with rasterio.open(fn) as src: | |
for _, window in chunk_windows(src, max_ram=max_ram): | |
print 'Window =', window, | |
arr = src.read(1, window=window) | |
print 'Max value =', np.max(arr) | |
del arr |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment