Marc Weber mhweber

Spatial analysis, aquatic ecology, R, Python, open source science

mhweber / Combine_files_and_zip.py

Created September 10, 2020 00:54

Combine multiple .csv files into one pandas dataframe and write out to a zip file

	# -- coding: utf-8 --
	"""
	Created on Wed Sep 9 16:46:15 2020

	@author: mweber
	"""


	# Combine hydro-region tables if desired
	import pandas as pd

mhweber / SpatialLinesEnpoints_sf

Created May 30, 2020 20:16

Derive endpoints for spatial lines in sf

mhweber / Mapview_wms.R

Last active January 28, 2020 18:31

R mapview display wms

	library(sf)
	library(dplyr)
	library(mapview)
	library(leaflet)

	m <- mapview()
	m@map = m@map %>% addWMSTiles(group = 'NHDPlus',
	"https://watersgeo.epa.gov/arcgis/services/NHDPlus_NP21/NHDSnapshot_NP21/MapServer/WmsServer?",
	layers = 4,
	options = WMSTileOptions(format = "image/png", transparent = TRUE),

mhweber / RasterToPoint.py

Created January 23, 2020 20:21

raster to point

	raster_2d = 2-dimensional array imported with gdal
	nodata = nodata value from gdal import for 2-d array
	geotransform = geotrasform from gdal import of 2-d array

	def raster_to_point(raster_2d, nodata, geotransform):
	raster_2d = raster_2d.flatten()
	locs = np.where(raster_2d <> nodata)[0] #Get locs in array w/ data
	raster_2d = raster_2d[locs] #Reduce data to those locs
	geo = list(geotransform) #Make geotransform from raster a list
	x = locs % xsize #Convert flattened locations to x coord

mhweber / ArcpySelectQueryPassVariable.py

Created October 10, 2019 20:58

Pass a variable to an arcpy selection query statement

	import os, arcpy, sys

	shp = 'F:/shapefile.shp'
	arcpy.MakeFeatureLayer_management(shp,"lyr")
	rows = arcpy.SearchCursor("lyr")
	for row in rows:
	name=str(row.getValue("ColumnName"))
	arcpy.SelectLayerByAttribute_management("lyr", "NEW_SELECTION", "'ColumnName' = '%s'"%name)

mhweber / summarize_group_most_common.R

Created July 29, 2019 23:05

Dplyr summarize to get most frequently occurring categorical value by a grouping variable

	library("Lahman")

	# Aggregation
	Batting %>%
	group_by(playerID) %>%
	summarize(most_team = names(which.max(table(teamID, exclude=NULL))))

mhweber / GeoPandas_Rasterio_Clipping.py

Last active October 31, 2025 13:37

Clipping rasters with GeoPandas and Rasterio

	import geopandas as gpd
	import rasterio
	from rasterio.mask import mask
	from rasterio.plot import show
	from shapely.geometry import mapping
	import matplotlib.pyplot as plt

	df = gpd.read_file('myfile.shp')

	df = df.loc[df['Field']=='SomeValue']

mhweber / State_Hydro_StreamCat_Download.R

Last active November 7, 2022 18:51

Download all StreamCat metrics for specific state or hydroregion

	library(RCurl)
	library(stringr)
	url <- "https://gaftp.epa.gov/EPADataCommons/ORD/NHDPlusLandscapeAttributes/StreamCat/States/" # can use hydro dir here instead

	filenames <- getURL(url, userpwd="", ftp.use.epsv = FALSE, verbose=FALSE,dirlistonly = TRUE, ssl.verifypeer = FALSE)
	destnames <- strsplit(filenames, "\r*\n")[[1]]
	destnames = destnames[grep("_OR.zip", destnames)] # different state - or hydroregion if in hydroregion directory
	setwd('somedir/subdir') # where you want to download
	for (d in destnames){
	d <- str_match_all(d, "href=\"(.*?)\"")

mhweber / ListCompFileRename.py

Created November 30, 2018 22:41

Rename multiple files in a directory using list comprehension

	import os
	path = 'some/path'
	[os.rename(f, f.replace('some_text','new_text')) for f in os.listdir(path) if f.count('text_to_look_for')]

mhweber / groupby_agg.py

Created July 27, 2018 21:07

pandas groupby and aggregation example

	import pandas as pd
	df = pd.DataFrame({'year' : pd.Series([1961, 1961, 1961, 1962, 1962,1963, 1963]),
	'value' : pd.Series([0.4, 0.5, 0.2, 0.8, 0.9, 0.8, 0.7])})
	df['max_values'] = pd.Series(df.groupby('year').value.max().values)