Skip to content

Instantly share code, notes, and snippets.

@phobson
Last active November 16, 2020 18:09
Show Gist options
  • Save phobson/b48dd42b377a4d88f4e302962ef1fa40 to your computer and use it in GitHub Desktop.
Save phobson/b48dd42b377a4d88f4e302962ef1fa40 to your computer and use it in GitHub Desktop.
Arcpy vs Geopandas
import sys
import geopandas
from helpers import width, peak_flow
def flow_column_name(return_period):
return "Q{:d}".format(int(float(return_period) * 10))
def populate_column(table, column, fxn, **kwargs):
assignment = {column: lambda df: df.apply(fxn, axis=1, **kwargs)}
return table.assign(**assignment)
def compute_width_and_flow(filename):
# list of all storm event return periods
return_periods = ['50', '20', '10', '4', '2', '1', '0.5', '0.2']
flow_columns = list(map(flow_column_name, return_periods))
width_col = 'width'
result_columns = flow_columns + [width_col]
# load the attribute table
table = geopandas.read_file(filename)
# compute the channel width
table = populate_column(table, "width", width)
# compute the flows
for fc in flow_columns:
table = populate_column(table, fc, peak_flow, colname=fc)
# write to file
table.to_file(filename)
return table
if __name__ == '__main__':
compute_width_and_flow(sys.argv[1])
import sys
import numpy
import arcpy
from helpers import width, peak_flow
def compute_width_and_flow(filename):
# list of all storm event return periods
return_periods = ['50', '20', '10', '4', '2', '1', '0.5', '0.2']
flow_columns = list(map(flow_column_name, return_periods))
width_col = 'width'
result_columns = flow_columns + [width_col]
# load the attribute table
table = load_attr_table(filename, cols_to_exclude=result_columns)
for col in result_columns:
table = add_column_to_table(table, column_name, -9999.9)
update_attr_table(filename, table)
populate_column(filename, "width", width, keyfields=['DRNAREA', 'AggregateEcoregion'])
for rp in return_periods:
column_name = flow_column_name(rp)
other_fields = ['AggregateEcoregion', column_name, 'DRNAREA', 'PRECIP', 'ELEV']
populate_column(filename, rp, peak_flow)
return table
def load_attr_table(filename, cols_to_exclude):
fields = filter(
lambda x: x.type != 'Date' and x.name not in cols_to_exclude,
arcpy.ListFields(filename)
)
fieldnames = [f.name for f in fields]
return arcpy.da.FeatureClassToNumPyArray(in_table=filename, field_names=fieldnames)
def update_attr_table(filename, table, id_col, orig_columns, new_columns):
all_columns = [id_col] + orig_columns
with arcpy.da.UpdateCursor(filename, all_columns) as cur:
for oldrow in cur:
newrow = find_row_in_table(table, id_col, oldrow[0])
if newrow is not None:
for n, col in enmumerate(new_columns, 1):
oldrow[n] = newrow[col]
cur.updateRow(oldrow)
return filename
def find_row_in_table(table, column, value):
rows = filter(lambda x: x[column] == value, table)
if len(rows) == 0:
row = None
elif len(rows) == 1:
row = row[0]
else:
raise ValueError('too many rows where {} == {}'.format(column, value))
return row
def flow_column_name(return_period):
return "Q{:d}".format(int(float(return_period) * 10))
def populate_column(table, column, fxn, **kwargs):
newtable = table.copy()
for ii, row in enumerate(table):
newtable[ii][column] = fxn(row, **kwargs)
return newtable
def add_column_to_table(array, new_column, new_values):
from numpy.lib.recfunctions import append_fields
# convert the new value to an array if necessary
if numpy.isscalar(new_values):
new_values = numpy.array([new_values] * array.shape[0])
# append the new colum
new_array = append_fields(array, new_column, new_values)
return new_array.data
if __name__ == '__main__':
compute_width_and_flow(sys.argv[1])
@kuanb
Copy link

kuanb commented Jul 19, 2017

This is interesting. I don't use ArcPy but would be curious to see what it's performance is like and if it leverages vectorization to process spatial data more quickly than GeoPandas.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment