Skip to content

Instantly share code, notes, and snippets.

@phargogh
Created April 14, 2021 04:30
Show Gist options
  • Save phargogh/651934f1eb4f36f07ec6c71598bec769 to your computer and use it in GitHub Desktop.
Save phargogh/651934f1eb4f36f07ec6c71598bec769 to your computer and use it in GitHub Desktop.
Comparing two approaches to raster averaging, based off the task/stormwater branch, rev b8974c7286d5ebaf68f4596ffb264132e5b5a04e
import time
import os
from natcap.invest import stormwater
import pygeoprocessing
from osgeo import gdal, osr
import numpy
import numpy.random
_PROJECTION_SRS = osr.SpatialReference()
_PROJECTION_SRS.ImportFromEPSG(26910) # UTM Zone 10N
PROJECTION_WKT = _PROJECTION_SRS.ExportToWkt()
# taken from https://spatialreference.org/ref/epsg/26910/
ORIGIN = (224215.8977, 3810589.9220)
RADIUS = 5 # pixels
DEMO_WORKSPACE = 'average_comparison'
if not os.path.exists(DEMO_WORKSPACE):
os.mkdir(DEMO_WORKSPACE)
# Create a reasonably-sized raster on disk to fill with random ints.
RANDOM_RASTER_PATH = os.path.join(DEMO_WORKSPACE, 'random_ints.tif')
DIM = 10000 # x, y dimensions of raster
PIXEL_SIZE = 20
ARRAY = numpy.random.randint(
low=0, high=255, size=DIM*DIM, dtype=numpy.uint16).reshape(
(DIM, DIM)) # .astype(numpy.float32) # float32 type appears needed for sw
# ARRAY = numpy.array([
# [246, 198, 115],
# [160, 166, 85],
# [51, 32, 125]], dtype=numpy.float32)
#ARRAY = numpy.array([[246, 198]], dtype=numpy.int32)
EXPECTED_AVERAGE = ARRAY.sum() / ARRAY.size
print(ARRAY)
pygeoprocessing.numpy_array_to_raster(
ARRAY, 255, (PIXEL_SIZE, -PIXEL_SIZE), ORIGIN, PROJECTION_WKT,
RANDOM_RASTER_PATH)
# use stormwater average function
stormwater_start_time = time.time()
KERNEL = stormwater.make_search_kernel(RANDOM_RASTER_PATH, RADIUS*PIXEL_SIZE)
SW_TARGET_PATH = os.path.join(DEMO_WORKSPACE, 'stormwater_average.tif')
stormwater.raster_average(
RANDOM_RASTER_PATH,
KERNEL,
os.path.join(DEMO_WORKSPACE, 'stormwater_n_values.tif'),
os.path.join(DEMO_WORKSPACE, 'stormwater_sum.tif'),
SW_TARGET_PATH)
print(f'Stormwater raster_averge took {time.time() - stormwater_start_time}s')
# numpy.testing.assert_almost_equal(
# gdal.OpenEx(SW_TARGET_PATH).ReadAsArray(), EXPECTED_AVERAGE)
def _make_kernel(kernel_filepath, pixel_radius):
kernel_size = int(pixel_radius * 2 + 1)
driver = gdal.GetDriverByName('GTiff')
kernel_dataset = driver.Create(
kernel_filepath.encode('utf-8'), kernel_size, kernel_size, 1,
gdal.GDT_Float32, options=[
'BIGTIFF=IF_SAFER', 'TILED=YES', 'BLOCKXSIZE=256',
'BLOCKYSIZE=256'])
# Make some kind of geotransform, it doesn't matter what but
# will make GIS libraries behave better if it's all defined
kernel_dataset.SetGeoTransform([0, 1, 0, 0, 0, -1])
srs = osr.SpatialReference()
srs.SetWellKnownGeogCS('WGS84')
kernel_dataset.SetProjection(srs.ExportToWkt())
kernel_band = kernel_dataset.GetRasterBand(1)
kernel_band.SetNoDataValue(255)
col_indices, row_indices = numpy.indices((kernel_size, kernel_size))
col_indices -= pixel_radius
row_indices -= pixel_radius
hypotenuse = numpy.hypot(col_indices, row_indices)
search_kernel = numpy.array(hypotenuse <= pixel_radius,
dtype=numpy.float32)
kernel_band.WriteArray(search_kernel)
kernel_band = None
kernel_raster = None
# use convolution
pygeoprocessing_start_time = time.time()
SEARCH_KERNEL_PATH = os.path.join(
DEMO_WORKSPACE, 'pygeoprocessing_avg_kernel.tif')
_make_kernel(SEARCH_KERNEL_PATH, RADIUS)
TARGET_PATH = os.path.join(DEMO_WORKSPACE, 'pygeoprocessing_average.tif')
pygeoprocessing.convolve_2d(
(RANDOM_RASTER_PATH, 1), (SEARCH_KERNEL_PATH, 1), TARGET_PATH,
ignore_nodata_and_edges=True,
normalize_kernel=True, # this is key, allows for kernel of 0, 1.
target_datatype=gdal.GDT_Float32,
target_nodata=float(numpy.finfo(numpy.float32).min),
mask_nodata=True,
working_dir=DEMO_WORKSPACE)
print(f'Pygeoprocessing took {time.time() - pygeoprocessing_start_time}s')
# numpy.testing.assert_almost_equal(
# gdal.OpenEx(TARGET_PATH).ReadAsArray(), EXPECTED_AVERAGE)
def _compare(raster1, raster2):
sw_raster = gdal.Open(raster1)
sw_band = sw_raster.GetRasterBand(1)
array1 = sw_band.ReadAsArray()
print(array1)
if isinstance(raster2, str):
pgp_raster = gdal.Open(raster2)
pgp_band = pgp_raster.GetRasterBand(1)
array2 = pgp_band.ReadAsArray()
else:
array2 = raster2
print(array2)
numpy.testing.assert_almost_equal(
array1, array2)
_compare(SEARCH_KERNEL_PATH, KERNEL) # Assert kernels are equal
_compare(SW_TARGET_PATH, TARGET_PATH) # Assert averages match
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment