Skip to content

Instantly share code, notes, and snippets.

@Jwely
Last active October 13, 2022 17:20
Show Gist options
  • Save Jwely/92d4bdb34eca000fde82 to your computer and use it in GitHub Desktop.
Save Jwely/92d4bdb34eca000fde82 to your computer and use it in GitHub Desktop.
An arcpy-like raster math function implemented with gdal
#!/usr/bin/env python
# in the input expression
#from osgeo import gdal, gdalconst, gdalnumeric
import gdal
import gdalconst
import gdalnumeric
import numpy
import os
import math # imported so it is available to the users expression evaluation
# used to convert between gdal and numpy datatypes
NP2GDAL_CONVERSION = {
"uint8": 1,
"int8": 1,
"uint16": 2,
"int16": 3,
"uint32": 4,
"int32": 5,
"float32": 6,
"float64": 7,
"complex64": 10,
"complex128": 11,
}
GDAL2NP_CONVERSION = {v: k for k, v in NP2GDAL_CONVERSION.items()}
def rast_math(output_path, expression, *args, dtype=None, set_nodata=None, dataset_info=None):
"""
A raster math calculator that uses GDALs python bindings instead of command line
interface for simple math. The syntax of this feels more pythonic than the native gdal_calc.py
syntax. Supports up to 26 raster images for each letter of the alphabet as
arguments. Supports single band raster images or gdal.Band instances as *args inputs.
Input expression will be directly evaluated, so users can input numerical constants
and simple ``numpy`` or ``math`` module operators with lowercase function names. Because
this function uses python bindings and numpy data structures, be mindful of the memory
limitations associated with 32-bit python, highly recommend using 64 bit.
This function does require the rasters to have identical extents, which can be easily achieved
by clipping them all with the same boundary before operating on them.
:param output_path: filepath at which to store expression result. Set to "None" to just return
the numeric array.
:param expression: the mathematical expression using rasters as A,B,C, etc
:param args: Filepaths to single band rasters that represent A,B,C, etc (in order)
OR, gdal.Band instances which could be used with multi-band rasters via...
ds = gdal.Open(rastpath)
bandA = ds.GetRasterBand(1)
bandB = ds.GetRasterBand(2)
rast_math(outpath, "numpy.log(A) + 3 * B", bandA, bandB)
:param dtype: gdal data type of output raster, int8, int16, and float32 are most common.
:param set_nodata: set an aditional value in the output raster to nodata
:param dataset_info: if using all gdalRasterBand instances, must provide geo-metadata manually with
`
:return: the output path to the file created by this function
An example for the ubiquitous NDVI calculation from landsat bands:
..code-block: python
root = r"my_landsat_directory" # directory with landsat tiffs
A_path = os.path.join(root, "tile_B5.TIF") # filepath to Band5
B_path = os.path.join(root, "tile_B4.TIF") # filepath to Band4
out_path = os.path.join(root, "NDVI.TIF") # filepath of new output image
rast_math(out_path, "(A + B) / (A - B)", A_path, B_path)
An example where we want to conditionally mask one raster by another, say
image "A" is our raster with data, and image "B" is a mask where a value of 1
indicates a bad value, and zero indicates a good value.
..code-block:python
rast_math(out_path, "A * (B == 0)", A_path, B_path
"""
if dtype is None:
dtype = "Float32"
# set up the iterators and structures
datasets = {} # dictionary where actual data will go
eval_args = {} # dictionary with string literals to be evaluated as code
nodata_value = None # initial value
dataset_in = None # inital value
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[:len(args)]
# format the expression with curly brackets around letters
print("Executing expression '{0}'".format(expression))
for letter in alphabet:
expression = expression.replace(letter, "{%s}" % letter)
# create the numpy arrays from raster datasets with gdal
for arg, letter in zip(args, alphabet):
# load the band data
if isinstance(arg, str):
if not os.path.exists(arg):
raise Exception("file {0} does not exist!".format(arg))
print("\tLoading {0} as raster '{1}'".format(arg, letter))
dataset_in = gdal.Open(arg, gdalconst.GA_ReadOnly)
band = dataset_in.GetRasterBand(1)
elif isinstance(arg, gdal.Band):
band = arg
else:
raise Exception("Input must be a filepath or a gdal.Band instance")
# format the band data with extracted metadata
dtype = GDAL2NP_CONVERSION[band.DataType]
nodata_value = band.GetNoDataValue()
array = numpy.ma.array(band.ReadAsArray(), dtype="float32") # read all arrays as float32s, convert on write
array = numpy.ma.masked_equal(array, nodata_value)
datasets[letter] = array
eval_args[letter] = "datasets['{0}']".format(letter)
# assemble and evaluate the expression. the guts of the function! though it is short.
eval_expression = expression.format(**eval_args)
print(eval_expression)
out_array = eval(eval_expression)
# set any final values to nodata.
if set_nodata is not None:
out_array.data[out_array.data == set_nodata] = nodata_value
# either save the output or return an output array
if output_path:
driver = gdal.GetDriverByName("GTiff") # create the geotiff driver object
yshape, xshape = datasets["A"].shape # set dimensions of output file
num_bands = 1 # only supports single band output
dataset_out = driver.Create(output_path, xshape, yshape, num_bands, NP2GDAL_CONVERSION[dtype])
# give the new file metadata
if dataset_info is not None:
gdalnumeric.CopyDatasetInfo(dataset_info, dataset_out)
elif dataset_in is not None:
gdalnumeric.CopyDatasetInfo(dataset_in, dataset_out)
else:
raise Exception("Either 1 input must be a raster filepath (with metadata), "
"or 'dataset_info' argument must be used")
band_out = dataset_out.GetRasterBand(1)
gdalnumeric.BandWriteArray(band_out, out_array)
return os.path.abspath(output_path)
else:
return out_array
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment