bnordgren · September 13, 2018 21:57
diff --git a/lc_trends.py b/lc_trends.py
 import glob
 import pandas as pd
 import zonal_stats as z



 def trends(shpfile, years, rasterpattern, histobins=10, historange=None) :

    rasterfiles = [ glob.glob(rasterpattern.format(y))[0] for y in years ]

    if historange is None : 
        historange = z.calc_range_many_rasters(shpfile,rasterfiles)

    trend = {}

    for i_y in range(len(years)) : 
        stats = z.zonal_histogram(shpfile, rasterfiles[i_y],
                  histobins, historange) 
        trend[years[i_y]] = z.make_histogram_frame(stats)
        
    return pd.concat(trend)


 def scalar_trends(shpfile, years, rasterpattern, lc_pattern, mask_fn,
                  valid_range=None, raster_nodata=None) : 
    rasterfiles = [ glob.glob(rasterpattern.format(y))[0] for y in years ]
    landcover_files = [ glob.glob(lc_pattern.format(y))[0] for y in years ]
    trend = {} 

    for i_y in range(len(years)) : 
        stats = z.zonal_stats(shpfile, rasterfiles[i_y],
                   mask_path=landcover_files[i_y],
                   mask_fn=mask_fn,valid_range=valid_range,
                   nodata_value=raster_nodata)
        trend[years[i_y]] = z.make_stats_frame(stats)

    return pd.concat(trend) 


 def scalar_trends_bands(shpfile, years, bands, rasterfile, raster_nodata=None) : 
    """A variant for the case where time is represented by a band of raster information."""
    trend = {} 

    for i_y in range(len(years)) : 
        print years[i_y], bands[i_y]
        stats = z.zonal_stats(shpfile, rasterfile,
                   band=bands[i_y],
                   nodata_value=raster_nodata)
        trend[years[i_y]] = z.make_stats_frame(stats)

    return pd.concat(trend) 
diff --git a/zonal_stats.py b/zonal_stats.py
 """
 Zonal Statistics
 Vector-Raster Analysis

 Copyright 2013 Matthew Perry

 Usage:
  zonal_stats.py VECTOR RASTER
  zonal_stats.py -h | --help
  zonal_stats.py --version

 Options:
  -h --help     Show this screen.
  --version     Show version.
 """
 from osgeo import gdal, ogr
 from osgeo.gdalconst import *
 import warnings
 import numpy as np
 import numpy.ma as ma
 import pandas as pd
 import sys
 import collections as c
 from functools import partial

 gdal.PushErrorHandler('CPLQuietErrorHandler')

 Offsets = c.namedtuple("Offsets", ['x0','y0','x_size','y_size'])



 def bbox_to_pixel_offsets(gt, bbox):
    originX = gt[0]
    originY = gt[3]
    pixel_width = gt[1]
    pixel_height = gt[5]

    # ensure there are no negative pixel indices
    x1 = max(0,int((bbox[0] - originX) / pixel_width))
    x2 = int((bbox[1] - originX) / pixel_width) # + 1

    # ensure there are no negative pixel indices
    y1 = max(0,int((bbox[3] - originY) / pixel_height))
    y2 = int((bbox[2] - originY) / pixel_height) # + 1

    xsize = max(1, x2 - x1)
    ysize = max(1, y2 - y1)
    return Offsets(x1, y1, xsize, ysize)

 def offset_polygon(offsets) : 
    x0 = offsets.x0
    y0 = offsets.y0
    x1 = x0 + offsets.x_size
    y1 = y0 + offsets.y_size
    ring = ogr.Geometry(ogr.wkbLinearRing) 
    ring.AddPoint(x0,y0)
    ring.AddPoint(x1,y0)
    ring.AddPoint(x1,y1)
    ring.AddPoint(x0,y1)
    ring.AddPoint(x0,y0)

    poly = ogr.Geometry(ogr.wkbPolygon)
    poly.AddGeometry(ring)
    return poly

 def clip_offsets_to_raster(offsets, raster) : 
    rast_offsets = Offsets(0,0,raster.RasterXSize, raster.RasterYSize)


    o_env = offset_polygon(offsets) 
    r_env = offset_polygon(rast_offsets)

    clipped_offsets = None
    clipped_geom = o_env.Intersection(r_env)
    if clipped_geom is not None : 
        clipped_area = clipped_geom.GetEnvelope()
        x1 = int(clipped_area[0])
        x2 = int(clipped_area[1])
        y1 = int(clipped_area[2])
        y2 = int(clipped_area[3])
        x_size = max(1, x2-x1)
        y_size = max(1, y2-y1)

        if x_size > 1 and y_size > 1 : 
            clipped_offsets = Offsets(x1,y1,x_size,y_size)

    return clipped_offsets
    
 def calc_histobins(histobins, historange) :
    
    # if histobins is the "int" type, that means to make integer-sized bins 
    # to cover the specified range.
    if histobins == int : 
        min_left = int(np.floor(historange[0]))
        max_right = int(np.ceil(historange[1]))
        histobins = np.linspace(min_left,max_right+1,num=max_right-min_left+2)
    
    # otherwise we just return what the user passed in.
    return histobins                       
        
 def calc_stats(masked) : 
    feature_stats = {
        'min': float(masked.min()),
        'mean': float(masked.mean()),
        'max': float(masked.max()),
        'std': float(masked.std()),
        'sum': float(masked.sum()),
        'count': int(masked.count())}
        
    return feature_stats
    
 def calc_histogram(histobins, historange, masked) : 
    feature_stats = {
        'histogram' : np.histogram(masked.compressed(), bins=histobins,
                                    range=historange)}
    return feature_stats

 def mask_nodata_and_rv(src_array, rv_array, nodata_value, mask_array) :
    """Masks any pixels where the src_array is the nodata value, or outside of the 
    rasterized vector.
     + src_array: the raster data
     + rv_array : boolean raster array converted from shapefile False=outside vector
     + nodata_value: nodata value to be applied to the array
     + mask_array: ignored"""
    return np.ma.MaskedArray(
            src_array,
            mask=np.logical_or(
                src_array == nodata_value,
                np.logical_not(rv_array)
            )
        )    

 def mask_nodata_range_and_rv(src_array, rv_array, nodata_value, mask_array, low, high) :
    """Masks pixels where:
       + src_array is the nodata value
       + pixel outside the rasterized vector
       + mask_array pixel value is outside the closed interval [low, high].
    """
    return np.ma.MaskedArray(
             src_array,
             mask=np.logical_or(
                    np.logical_or(
                      np.logical_not(rv_array),
                      src_array == nodata_value),
                    np.logical_or(
                      mask_array < low,
                      mask_array > high)))

 def select_forest(src_array, rv_array, nodata_value, mask_array) : 
    """Assuming mask_array is a MODIS landcover raster, selects only forest types"""
    return mask_nodata_range_and_rv(src_array, rv_array, nodata_value, mask_array, 1,5)
    
 def select_non_forest(src_array, rv_array, nodata_value, mask_array) : 
    """Assuming mask_array is a MODIS landcover raster, selects grassland/savannah types"""
    return mask_nodata_range_and_rv(src_array, rv_array, nodata_value, mask_array, 6, 10)                      

 def select_non_ag(src_array, rv_array, nodata_value, mask_array) : 
    """Assuming mask_array is a MODIS landcover raster, selects forest and grassland/savannah types"""
    return mask_nodata_range_and_rv(src_array, rv_array, nodata_value, mask_array, 1, 10)

 def zonal_stats(vector_path, raster_path, mask_path=None, nodata_value=None, 
                 global_src_extent=False, only_global=False,
                 calc_fn=calc_stats, mask_fn=mask_nodata_and_rv,
                 valid_range=None, band=1):
    """computes zonal statistics for the raster over the vector.
    Allows user to optionally specify a mask layer, an external mask calculation
    function, nodata value, whether to perform the stats globally, and whether 
    to load the raster data just once, globally, or once for each feature in the 
    vector file.
    
    If the user specifies a mask file, it must be the same dimensions, projection, and 
    coverage as the raster file."""
    warnings.simplefilter("error", UserWarning)
    rds = gdal.Open(raster_path, GA_ReadOnly)
    assert(rds)
    rb = rds.GetRasterBand(band)
    rgt = rds.GetGeoTransform()
    
    mask_array = None
    if mask_path is not None : 
        mds = gdal.Open(mask_path, GA_ReadOnly)
        assert(mds)
        mb = mds.GetRasterBand(1)
        mgt = mds.GetGeoTransform()
        if not np.allclose(mgt,rgt) :
            raise ValueError("Raster and Mask must have same resolution and coverage.")
        rproj=rds.GetProjection()
        mproj=mds.GetProjection()
        if rproj != mproj : 
            raise ValueError("Raster and Mask must have same projection.")
        

    if nodata_value:
        nodata_value = float(nodata_value)
        rb.SetNoDataValue(nodata_value)

    vds = ogr.Open(vector_path, GA_ReadOnly)  # TODO maybe open update if we want to write stats
    assert(vds)
    vlyr = vds.GetLayer(0)


    # create an in-memory numpy array of the source raster data
    # covering the whole extent of the vector layer
    if global_src_extent or only_global:
        # find "global min/max" to calculate common bins for all histograms
        src_offset = bbox_to_pixel_offsets(rgt, vlyr.GetExtent())
        src_offset = clip_offsets_to_raster(src_offset, rds)
        if src_offset is None: 
            raise ValueError("raster and vector do not intersect")
        src_array = rb.ReadAsArray(*src_offset)
        src_array = ma.masked_values(src_array, rb.GetNoDataValue())
        if valid_range is not None: 
            src_array = ma.masked_outside(src_array,valid_range[0],valid_range[1])
        if mask_path is not None : 
            mask_array = mb.ReadAsArray(*src_offset)
        

        # use global source extent
        # useful only when disk IO or raster scanning inefficiencies are your limiting factor
        # advantage: reads raster data in one pass
        # disadvantage: large vector extents may have big memory requirements

        # calculate new geotransform of the layer subset
        new_gt = (
            (rgt[0] + (src_offset[0] * rgt[1])),
            rgt[1],
            0.0,
            (rgt[3] + (src_offset[1] * rgt[5])),
            0.0,
            rgt[5]
        )
        
        # if asked, compute the stats just for the contents of the bounding box
        if only_global : 
            masked = ma.masked_values(src_array, rb.GetNoDataValue())
            feature_stats = calc_fn(masked)
            return [ feature_stats ] 


    mem_drv = ogr.GetDriverByName('Memory')
    driver = gdal.GetDriverByName('MEM')

    # Loop through vectors
    stats = []
    feat = vlyr.GetNextFeature()
    while feat is not None:
        if not global_src_extent:
            # use local source extent
            # fastest option when you have fast disks and well indexed raster (ie tiled Geotiff)
            # advantage: each feature uses the smallest raster chunk
            # disadvantage: lots of reads on the source raster
            src_offset = bbox_to_pixel_offsets(rgt, feat.geometry().GetEnvelope())
            src_offset = clip_offsets_to_raster(src_offset,rds)
            # what follows should read as 
            # if src_offset is None : 
            #   feat=vlyr.GetNextFeature() 
            #   continue
            #
            # Unfortunately, ogr has a bug that makes the while 
            # loop spin infinitely if a continue statement is used. 
            if src_offset is not None : 
                src_array = rb.ReadAsArray(*src_offset)
                src_array = ma.masked_values(src_array, rb.GetNoDataValue())
                if valid_range is not None: 
                    src_array = ma.masked_outside(src_array,valid_range[0],valid_range[1])
                if mask_path is not None : 
                    mask_array = mb.ReadAsArray(*src_offset)
    
                # calculate new geotransform of the feature subset
                new_gt = (
                    (rgt[0] + (src_offset[0] * rgt[1])),
                    rgt[1],
                    0.0,
                    (rgt[3] + (src_offset[1] * rgt[5])),
                    0.0,
                    rgt[5]
                )

        # workaround for python/ogr bug involving continue statements
        # see above.
        if src_offset is not None: 

            # Create a temporary vector layer in memory
            mem_ds = mem_drv.CreateDataSource('out')
            mem_layer = mem_ds.CreateLayer('poly', None, ogr.wkbPolygon)
            mem_layer.CreateFeature(feat.Clone())

            # Rasterize it
            rvds = driver.Create('', src_offset[2], src_offset[3], 1, gdal.GDT_Byte)
            rvds.SetGeoTransform(new_gt)
            gdal.RasterizeLayer(rvds, [1], mem_layer, burn_values=[1])
            rv_array = rvds.ReadAsArray()

            # Mask the source data array with our current feature
            # we take the logical_not to flip 0<->1 to get the correct mask effect
            # we also mask out nodata values explictly
            masked = mask_fn(src_array, rv_array, nodata_value, mask_array)
    
            # Compute stats only if there are one or more pixels remaining (unmasked)
            if masked.count() > 0 : 
                feature_stats = calc_fn(masked)
                feature_stats['fid'] = int(feat.GetFID())
                stats.append(feature_stats)

            rvds = None
            mem_ds = None

        feat = vlyr.GetNextFeature()

    vds = None
    rds = None
    if mask_path is not None  : 
        mds = None
    return stats

 def calc_range(vector_path, raster_path) : 
    stats = zonal_stats(vector_path, raster_path, only_global=True)
    return (stats[0]['min'], stats[0]['max'])

 def calc_range_many_rasters(vector_path, raster_path_array) : 
    totalrange = None
    for raster_path in raster_path_array : 
        cur_range = calc_range(vector_path, raster_path)
        if totalrange is None : 
            totalrange = list(cur_range)
        else : 
            totalrange[0] = min(totalrange[0],cur_range[0])
            totalrange[1] = max(totalrange[1],cur_range[1])
            
    return totalrange

 def zonal_histogram(vector_path, raster_path, 
                    histobins=10, historange=None) :

    # go compute the range if necessary
    if historange is None : 
        historange = calc_range(vector_path, raster_path)
        
    # does the number of bins need modifying? 
    histobins = calc_histobins(histobins, historange)
    
    #compose the stats function
    histo_fn = partial(calc_histogram, histobins, historange)
    
    # go compute the zonal histogram, using the same bins for all zones/fids
    return zonal_stats(vector_path, raster_path, calc_fn=histo_fn)

 def make_histogram_frame(stats) :
    """Processes stats from zonal histogram to return a data frame containing 
    only the histogram data. Fid + bins are columns. Each row contains data for 
    one fid."""
    
    # post process the results into a data frame where the 
    fids = [ i['fid'] for i in stats ]
    num_histobins = len(stats[0]['histogram'][0])
    histobins = [None] * num_histobins
    for i in range(num_histobins) :
        histobins[i] = [ item['histogram'][0][i] for item in stats ]

    tabledef = c.OrderedDict()
    tabledef['FID'] =  fids
    histo_edges = stats[0]['histogram'][1]
    for i in range(num_histobins) :
        tabledef['Bin_{}'.format(histo_edges[i])] = histobins[i]

    return pd.DataFrame(tabledef)

 def make_stats_frame(stats) : 
    return pd.DataFrame(stats)

 if __name__ == "__main__":
    opts = {'VECTOR': sys.argv[1], 'RASTER': sys.argv[2]}
    stats = zonal_stats(opts['VECTOR'], opts['RASTER'])

    try:
        from pandas import DataFrame
        print DataFrame(stats)
    except ImportError:
        import json
        print json.dumps(stats, indent=2)
diff --git a/zonal_stats_example.txt b/zonal_stats_example.txt
 $ time python zonal_stats.py test.shp terrain/slope.tif
    count  fid  max       mean  min        std    sum
 0     203    0   96  65.876847    3  17.968489  13373
 1     130    1   90  60.100000    3  16.728994   7813
 2    1341    2  102  53.211037    2  17.901655  71356
 3     130    3   90  60.100000    3  16.728994   7813
 4     132    4   64  15.962121    1  15.360519   2107
 5     132    5   53  31.515152   17   7.970100   4160
 6     131    6   42   9.893130    0   8.168317   1296
 7     132    7   64  28.712121    2  14.853594   3790
 8     133    8   54  35.548872   11   8.878856   4728
 9     131    9   82  52.297710    4  17.349877   6851
 10    131   10   11   3.030534    0   1.781752    397
 11    134   11   57  10.156716    1  11.960042   1361
 12    133   12   45  19.000000    0  13.727750   2527
 13    132   13   64  26.507576    1  18.848075   3499
 14    132   14   94  52.787879    1  22.297585   6968
 15    131   15   84  19.450382    1  15.992944   2548
 16    132   16   52  11.583333    0  11.538501   1529
 17    132   17  108  53.515152    6  18.198603   7064
 18    341   18   76  39.117302    9  11.540482  13339
 19    337   19   57  19.988131    4   9.593512   6736
 20    336   20   78  48.636905   11  13.357014  16342
 21    338   21    3   0.855030    0   0.527067    289
 22    337   22   34   5.347181    0   7.069888   1802
 23    341   23    0   0.000000    0   0.000000      0
 24    341   24   42  16.612903    0   9.041271   5665
 25    337   25  128  78.848665    5  18.689028  26572
 26    341   26   29   7.973607    1   5.341357   2719
 27    339   27   78  35.616519    5  14.455317  12074
 28    341   28   65  20.199413    0  16.636394   6888
 29    340   29   84  35.855882    1  17.022989  12191
 30    338   30   96  61.440828    2  16.703587  20767
 31    340   31  101  57.832353    8  18.161971  19663

 real    0m1.311s
 user    0m0.372s
 sys     0m0.752s


 #### Starspan equivalent
 $ time starspan --vector test.shp --out-prefix testout --out-type table \
 --summary-suffix _stats.csv --raster terrain/slope.tif \
 --stats avg mode median min max sum stdev nulls && \
 cat testout_stats.csv
  1: Extracting from /usr/local/apps/land_owner_tools/lot/fixtures/downloads/terrain/slope.tif
 Summary:
  Intersecting features: 32
      Polygons: 32

  Processed pixels: 8379

 real    0m1.440s
 user    0m0.944s
 sys     0m0.296s
	import glob
	import pandas as pd
	import zonal_stats as z



	def trends(shpfile, years, rasterpattern, histobins=10, historange=None) :

	rasterfiles = [ glob.glob(rasterpattern.format(y))[0] for y in years ]

	if historange is None :
	historange = z.calc_range_many_rasters(shpfile,rasterfiles)

	trend = {}

	for i_y in range(len(years)) :
	stats = z.zonal_histogram(shpfile, rasterfiles[i_y],
	histobins, historange)
	trend[years[i_y]] = z.make_histogram_frame(stats)

	return pd.concat(trend)


	def scalar_trends(shpfile, years, rasterpattern, lc_pattern, mask_fn,
	valid_range=None, raster_nodata=None) :
	rasterfiles = [ glob.glob(rasterpattern.format(y))[0] for y in years ]
	landcover_files = [ glob.glob(lc_pattern.format(y))[0] for y in years ]
	trend = {}

	for i_y in range(len(years)) :
	stats = z.zonal_stats(shpfile, rasterfiles[i_y],
	mask_path=landcover_files[i_y],
	mask_fn=mask_fn,valid_range=valid_range,
	nodata_value=raster_nodata)
	trend[years[i_y]] = z.make_stats_frame(stats)

	return pd.concat(trend)


	def scalar_trends_bands(shpfile, years, bands, rasterfile, raster_nodata=None) :
	"""A variant for the case where time is represented by a band of raster information."""
	trend = {}

	for i_y in range(len(years)) :
	print years[i_y], bands[i_y]
	stats = z.zonal_stats(shpfile, rasterfile,
	band=bands[i_y],
	nodata_value=raster_nodata)
	trend[years[i_y]] = z.make_stats_frame(stats)

	return pd.concat(trend)
	"""
	Zonal Statistics
	Vector-Raster Analysis

	Copyright 2013 Matthew Perry

	Usage:
	zonal_stats.py VECTOR RASTER
	zonal_stats.py -h \| --help
	zonal_stats.py --version

	Options:
	-h --help Show this screen.
	--version Show version.
	"""
	from osgeo import gdal, ogr
	from osgeo.gdalconst import *
	import warnings
	import numpy as np
	import numpy.ma as ma
	import pandas as pd
	import sys
	import collections as c
	from functools import partial

	gdal.PushErrorHandler('CPLQuietErrorHandler')

	Offsets = c.namedtuple("Offsets", ['x0','y0','x_size','y_size'])



	def bbox_to_pixel_offsets(gt, bbox):
	originX = gt[0]
	originY = gt[3]
	pixel_width = gt[1]
	pixel_height = gt[5]

	# ensure there are no negative pixel indices
	x1 = max(0,int((bbox[0] - originX) / pixel_width))
	x2 = int((bbox[1] - originX) / pixel_width) # + 1

	# ensure there are no negative pixel indices
	y1 = max(0,int((bbox[3] - originY) / pixel_height))
	y2 = int((bbox[2] - originY) / pixel_height) # + 1

	xsize = max(1, x2 - x1)
	ysize = max(1, y2 - y1)
	return Offsets(x1, y1, xsize, ysize)

	def offset_polygon(offsets) :
	x0 = offsets.x0
	y0 = offsets.y0
	x1 = x0 + offsets.x_size
	y1 = y0 + offsets.y_size
	ring = ogr.Geometry(ogr.wkbLinearRing)
	ring.AddPoint(x0,y0)
	ring.AddPoint(x1,y0)
	ring.AddPoint(x1,y1)
	ring.AddPoint(x0,y1)
	ring.AddPoint(x0,y0)

	poly = ogr.Geometry(ogr.wkbPolygon)
	poly.AddGeometry(ring)
	return poly

	def clip_offsets_to_raster(offsets, raster) :
	rast_offsets = Offsets(0,0,raster.RasterXSize, raster.RasterYSize)


	o_env = offset_polygon(offsets)
	r_env = offset_polygon(rast_offsets)

	clipped_offsets = None
	clipped_geom = o_env.Intersection(r_env)
	if clipped_geom is not None :
	clipped_area = clipped_geom.GetEnvelope()
	x1 = int(clipped_area[0])
	x2 = int(clipped_area[1])
	y1 = int(clipped_area[2])
	y2 = int(clipped_area[3])
	x_size = max(1, x2-x1)
	y_size = max(1, y2-y1)

	if x_size > 1 and y_size > 1 :
	clipped_offsets = Offsets(x1,y1,x_size,y_size)

	return clipped_offsets

	def calc_histobins(histobins, historange) :

	# if histobins is the "int" type, that means to make integer-sized bins
	# to cover the specified range.
	if histobins == int :
	min_left = int(np.floor(historange[0]))
	max_right = int(np.ceil(historange[1]))
	histobins = np.linspace(min_left,max_right+1,num=max_right-min_left+2)

	# otherwise we just return what the user passed in.
	return histobins

	def calc_stats(masked) :
	feature_stats = {
	'min': float(masked.min()),
	'mean': float(masked.mean()),
	'max': float(masked.max()),
	'std': float(masked.std()),
	'sum': float(masked.sum()),
	'count': int(masked.count())}

	return feature_stats

	def calc_histogram(histobins, historange, masked) :
	feature_stats = {
	'histogram' : np.histogram(masked.compressed(), bins=histobins,
	range=historange)}
	return feature_stats

	def mask_nodata_and_rv(src_array, rv_array, nodata_value, mask_array) :
	"""Masks any pixels where the src_array is the nodata value, or outside of the
	rasterized vector.
	+ src_array: the raster data
	+ rv_array : boolean raster array converted from shapefile False=outside vector
	+ nodata_value: nodata value to be applied to the array
	+ mask_array: ignored"""
	return np.ma.MaskedArray(
	src_array,
	mask=np.logical_or(
	src_array == nodata_value,
	np.logical_not(rv_array)
	)
	)

	def mask_nodata_range_and_rv(src_array, rv_array, nodata_value, mask_array, low, high) :
	"""Masks pixels where:
	+ src_array is the nodata value
	+ pixel outside the rasterized vector
	+ mask_array pixel value is outside the closed interval [low, high].
	"""
	return np.ma.MaskedArray(
	src_array,
	mask=np.logical_or(
	np.logical_or(
	np.logical_not(rv_array),
	src_array == nodata_value),
	np.logical_or(
	mask_array < low,
	mask_array > high)))

	def select_forest(src_array, rv_array, nodata_value, mask_array) :
	"""Assuming mask_array is a MODIS landcover raster, selects only forest types"""
	return mask_nodata_range_and_rv(src_array, rv_array, nodata_value, mask_array, 1,5)

	def select_non_forest(src_array, rv_array, nodata_value, mask_array) :
	"""Assuming mask_array is a MODIS landcover raster, selects grassland/savannah types"""
	return mask_nodata_range_and_rv(src_array, rv_array, nodata_value, mask_array, 6, 10)

	def select_non_ag(src_array, rv_array, nodata_value, mask_array) :
	"""Assuming mask_array is a MODIS landcover raster, selects forest and grassland/savannah types"""
	return mask_nodata_range_and_rv(src_array, rv_array, nodata_value, mask_array, 1, 10)

	def zonal_stats(vector_path, raster_path, mask_path=None, nodata_value=None,
	global_src_extent=False, only_global=False,
	calc_fn=calc_stats, mask_fn=mask_nodata_and_rv,
	valid_range=None, band=1):
	"""computes zonal statistics for the raster over the vector.
	Allows user to optionally specify a mask layer, an external mask calculation
	function, nodata value, whether to perform the stats globally, and whether
	to load the raster data just once, globally, or once for each feature in the
	vector file.

	If the user specifies a mask file, it must be the same dimensions, projection, and
	coverage as the raster file."""
	warnings.simplefilter("error", UserWarning)
	rds = gdal.Open(raster_path, GA_ReadOnly)
	assert(rds)
	rb = rds.GetRasterBand(band)
	rgt = rds.GetGeoTransform()

	mask_array = None
	if mask_path is not None :
	mds = gdal.Open(mask_path, GA_ReadOnly)
	assert(mds)
	mb = mds.GetRasterBand(1)
	mgt = mds.GetGeoTransform()
	if not np.allclose(mgt,rgt) :
	raise ValueError("Raster and Mask must have same resolution and coverage.")
	rproj=rds.GetProjection()
	mproj=mds.GetProjection()
	if rproj != mproj :
	raise ValueError("Raster and Mask must have same projection.")


	if nodata_value:
	nodata_value = float(nodata_value)
	rb.SetNoDataValue(nodata_value)

	vds = ogr.Open(vector_path, GA_ReadOnly) # TODO maybe open update if we want to write stats
	assert(vds)
	vlyr = vds.GetLayer(0)


	# create an in-memory numpy array of the source raster data
	# covering the whole extent of the vector layer
	if global_src_extent or only_global:
	# find "global min/max" to calculate common bins for all histograms
	src_offset = bbox_to_pixel_offsets(rgt, vlyr.GetExtent())
	src_offset = clip_offsets_to_raster(src_offset, rds)
	if src_offset is None:
	raise ValueError("raster and vector do not intersect")
	src_array = rb.ReadAsArray(*src_offset)
	src_array = ma.masked_values(src_array, rb.GetNoDataValue())
	if valid_range is not None:
	src_array = ma.masked_outside(src_array,valid_range[0],valid_range[1])
	if mask_path is not None :
	mask_array = mb.ReadAsArray(*src_offset)


	# use global source extent
	# useful only when disk IO or raster scanning inefficiencies are your limiting factor
	# advantage: reads raster data in one pass
	# disadvantage: large vector extents may have big memory requirements

	# calculate new geotransform of the layer subset
	new_gt = (
	(rgt[0] + (src_offset[0] * rgt[1])),
	rgt[1],
	0.0,
	(rgt[3] + (src_offset[1] * rgt[5])),
	0.0,
	rgt[5]
	)

	# if asked, compute the stats just for the contents of the bounding box
	if only_global :
	masked = ma.masked_values(src_array, rb.GetNoDataValue())
	feature_stats = calc_fn(masked)
	return [ feature_stats ]


	mem_drv = ogr.GetDriverByName('Memory')
	driver = gdal.GetDriverByName('MEM')

	# Loop through vectors
	stats = []
	feat = vlyr.GetNextFeature()
	while feat is not None:
	if not global_src_extent:
	# use local source extent
	# fastest option when you have fast disks and well indexed raster (ie tiled Geotiff)
	# advantage: each feature uses the smallest raster chunk
	# disadvantage: lots of reads on the source raster
	src_offset = bbox_to_pixel_offsets(rgt, feat.geometry().GetEnvelope())
	src_offset = clip_offsets_to_raster(src_offset,rds)
	# what follows should read as
	# if src_offset is None :
	# feat=vlyr.GetNextFeature()
	# continue
	#
	# Unfortunately, ogr has a bug that makes the while
	# loop spin infinitely if a continue statement is used.
	if src_offset is not None :
	src_array = rb.ReadAsArray(*src_offset)
	src_array = ma.masked_values(src_array, rb.GetNoDataValue())
	if valid_range is not None:
	src_array = ma.masked_outside(src_array,valid_range[0],valid_range[1])
	if mask_path is not None :
	mask_array = mb.ReadAsArray(*src_offset)

	# calculate new geotransform of the feature subset
	new_gt = (
	(rgt[0] + (src_offset[0] * rgt[1])),
	rgt[1],
	0.0,
	(rgt[3] + (src_offset[1] * rgt[5])),
	0.0,
	rgt[5]
	)

	# workaround for python/ogr bug involving continue statements
	# see above.
	if src_offset is not None:

	# Create a temporary vector layer in memory
	mem_ds = mem_drv.CreateDataSource('out')
	mem_layer = mem_ds.CreateLayer('poly', None, ogr.wkbPolygon)
	mem_layer.CreateFeature(feat.Clone())

	# Rasterize it
	rvds = driver.Create('', src_offset[2], src_offset[3], 1, gdal.GDT_Byte)
	rvds.SetGeoTransform(new_gt)
	gdal.RasterizeLayer(rvds, [1], mem_layer, burn_values=[1])
	rv_array = rvds.ReadAsArray()

	# Mask the source data array with our current feature
	# we take the logical_not to flip 0<->1 to get the correct mask effect
	# we also mask out nodata values explictly
	masked = mask_fn(src_array, rv_array, nodata_value, mask_array)

	# Compute stats only if there are one or more pixels remaining (unmasked)
	if masked.count() > 0 :
	feature_stats = calc_fn(masked)
	feature_stats['fid'] = int(feat.GetFID())
	stats.append(feature_stats)

	rvds = None
	mem_ds = None

	feat = vlyr.GetNextFeature()

	vds = None
	rds = None
	if mask_path is not None :
	mds = None
	return stats

	def calc_range(vector_path, raster_path) :
	stats = zonal_stats(vector_path, raster_path, only_global=True)
	return (stats[0]['min'], stats[0]['max'])

	def calc_range_many_rasters(vector_path, raster_path_array) :
	totalrange = None
	for raster_path in raster_path_array :
	cur_range = calc_range(vector_path, raster_path)
	if totalrange is None :
	totalrange = list(cur_range)
	else :
	totalrange[0] = min(totalrange[0],cur_range[0])
	totalrange[1] = max(totalrange[1],cur_range[1])

	return totalrange

	def zonal_histogram(vector_path, raster_path,
	histobins=10, historange=None) :

	# go compute the range if necessary
	if historange is None :
	historange = calc_range(vector_path, raster_path)

	# does the number of bins need modifying?
	histobins = calc_histobins(histobins, historange)

	#compose the stats function
	histo_fn = partial(calc_histogram, histobins, historange)

	# go compute the zonal histogram, using the same bins for all zones/fids
	return zonal_stats(vector_path, raster_path, calc_fn=histo_fn)

	def make_histogram_frame(stats) :
	"""Processes stats from zonal histogram to return a data frame containing
	only the histogram data. Fid + bins are columns. Each row contains data for
	one fid."""

	# post process the results into a data frame where the
	fids = [ i['fid'] for i in stats ]
	num_histobins = len(stats[0]['histogram'][0])
	histobins = [None] * num_histobins
	for i in range(num_histobins) :
	histobins[i] = [ item['histogram'][0][i] for item in stats ]

	tabledef = c.OrderedDict()
	tabledef['FID'] = fids
	histo_edges = stats[0]['histogram'][1]
	for i in range(num_histobins) :
	tabledef['Bin_{}'.format(histo_edges[i])] = histobins[i]

	return pd.DataFrame(tabledef)

	def make_stats_frame(stats) :
	return pd.DataFrame(stats)

	if __name__ == "__main__":
	opts = {'VECTOR': sys.argv[1], 'RASTER': sys.argv[2]}
	stats = zonal_stats(opts['VECTOR'], opts['RASTER'])

	try:
	from pandas import DataFrame
	print DataFrame(stats)
	except ImportError:
	import json
	print json.dumps(stats, indent=2)
	$ time python zonal_stats.py test.shp terrain/slope.tif
	count fid max mean min std sum
	0 203 0 96 65.876847 3 17.968489 13373
	1 130 1 90 60.100000 3 16.728994 7813
	2 1341 2 102 53.211037 2 17.901655 71356
	3 130 3 90 60.100000 3 16.728994 7813
	4 132 4 64 15.962121 1 15.360519 2107
	5 132 5 53 31.515152 17 7.970100 4160
	6 131 6 42 9.893130 0 8.168317 1296
	7 132 7 64 28.712121 2 14.853594 3790
	8 133 8 54 35.548872 11 8.878856 4728
	9 131 9 82 52.297710 4 17.349877 6851
	10 131 10 11 3.030534 0 1.781752 397
	11 134 11 57 10.156716 1 11.960042 1361
	12 133 12 45 19.000000 0 13.727750 2527
	13 132 13 64 26.507576 1 18.848075 3499
	14 132 14 94 52.787879 1 22.297585 6968
	15 131 15 84 19.450382 1 15.992944 2548
	16 132 16 52 11.583333 0 11.538501 1529
	17 132 17 108 53.515152 6 18.198603 7064
	18 341 18 76 39.117302 9 11.540482 13339
	19 337 19 57 19.988131 4 9.593512 6736
	20 336 20 78 48.636905 11 13.357014 16342
	21 338 21 3 0.855030 0 0.527067 289
	22 337 22 34 5.347181 0 7.069888 1802
	23 341 23 0 0.000000 0 0.000000 0
	24 341 24 42 16.612903 0 9.041271 5665
	25 337 25 128 78.848665 5 18.689028 26572
	26 341 26 29 7.973607 1 5.341357 2719
	27 339 27 78 35.616519 5 14.455317 12074
	28 341 28 65 20.199413 0 16.636394 6888
	29 340 29 84 35.855882 1 17.022989 12191
	30 338 30 96 61.440828 2 16.703587 20767
	31 340 31 101 57.832353 8 18.161971 19663

	real 0m1.311s
	user 0m0.372s
	sys 0m0.752s


	#### Starspan equivalent
	$ time starspan --vector test.shp --out-prefix testout --out-type table \
	--summary-suffix _stats.csv --raster terrain/slope.tif \
	--stats avg mode median min max sum stdev nulls && \
	cat testout_stats.csv
	1: Extracting from /usr/local/apps/land_owner_tools/lot/fixtures/downloads/terrain/slope.tif
	Summary:
	Intersecting features: 32
	Polygons: 32

	Processed pixels: 8379

	real 0m1.440s
	user 0m0.944s
	sys 0m0.296s