Skip to content

Instantly share code, notes, and snippets.

@lehostert
Last active May 6, 2021 21:02
Show Gist options
  • Save lehostert/7794a629f99a78b82c13d2e013098fb7 to your computer and use it in GitHub Desktop.
Save lehostert/7794a629f99a78b82c13d2e013098fb7 to your computer and use it in GitHub Desktop.
read and summarize USGS gauge data
#### For Loop ####
setwd("~/GitHub/Gists/gauge_summary/data")
### This is an example of using the tidyverse with functional programming to get a df/ tibble of the gauges called "gauge list"
### THis is a tibble/dataframe and not a list
gauge_list <- sites %>%
select(gauge_station) %>%
unique()
### If you wanted to do it this was and then you need to explictly specify that
gauge_list_actual <- sites %>%
select(gauge_station) %>%
unique() %>%
as.list()
# OR
also_a_list <- as.list(gauge_list)
### In order to use for loops you must have an iterable to be working over. This can be a vector or a list but not a dataframe
## If you select one column from a data frame it is a vector and therefore usable
is.vector(sites$gauge_station)
is.vector(gauge_list)
is.vector(gauge_list_actual)
is.vector(also_a_list)
### the value that index value (part before 'in') can be anything because it is a variable. I like to call theme something
### that helps be remember the purpose of the list but I could call it 'x' below and it would work the same.
## For Loop Example
for (gauge in unique(sites$gauge_station)) {
date_start <- "2019-05-31"
date_end <- "2019-07-01"
get_usgs(gauge, date_start, date_end)
website <- paste0("Website for ",gauge," is: ","https://nwis.waterdata.usgs.gov/nwis/uv?cb_00045=on&cb_00060=on&cb_00065=on&format=rdb&site_no=",gauge,"&period=&begin_date=",date_start,"&end_date=",date_end)
print(website)
}
### Nested loop examples
## Every combination of x & y
for (x in sites$site) {
for (y in sites$date) {
print(paste(x,y))
}
}
###
#### Puuur map Example
printer <- function(gauge_no, sample_date){
print(paste("Gauge Station", gauge_no, "should be sampled on", sample_date))
}
printer(555, "2020-10-04")
### Attempt to try this pmpa thing
printer <- function(gauge_station, date, ...){
x <- paste("Gauge Station", gauge_station, "should be sampled on", date)
print(x)
}
purrr::pmap(sites, printer)
#### try it with a simple function
sum_one <- function(df, event_date, ...){
one <- df %>%
filter(lubridate::date(datetime) == event_date) %>%
group_by(site_no) %>%
summarize(date = event_date,
mean_gage = mean(gage_height_ft),
mean_discharge = mean(discharge_cfs),
mean_precip = mean(precip_in))
return(one)
}
c <- sum_one(copper2,"2019-06-23")
#TODO Can you use next() to go through a whole list of sites.
#### Toy examples
for (gauge in unique(sites$gauge_station)) {
print(gauge)
}
#### Below here be dragons ####
sum_two <- function(x,y){
s <- x + y
return(s)
}
sum_two(3,2)
data <- read.table(text="x y
1 2
2 3
3 4
4 5", header=TRUE)
length(data[,2])
for (x in 1:nrow(data[1])) {
for(y in 1:nrow(data[2])){
result <- sum_two(x, y)
print(result)
}
}
x <- c(1,2,3,4)
y <- c(2,3,4,5)
for (i in 1:length(x)) {
for(i in 1:length(y)){
result <- sum_two(x[i], y[i])
print(result)
}
}
###@ purrr::map Function ####
purrr::pmap_dfr(,sum_two)
purrr::map(sites, "gauge_station")
library(tidyverse)
library(lubridate)
#Set your current working directory so that you will know where any generated files are going
# setwd()
## Read in example data set with sites, sample dates, and closest USGS station locations
sites <- read.table(text="site date gauge_station
A 2018-06-23 05590050
B 2018-07-21 05590050
C 2018-08-24 05590050
D 2018-09-30 05591700", header=TRUE, colClasses = c("character", "Date", "character"))
## Make sure our dates are actually set to class "Date" for future calculations
sites$date <- as.Date(sites$date)
## function for reading in data from USGS website as .TSV/RDB and saving as a .csv in your current working directory
get_usgs <- function(station_code, date_start, date_end){
df <- readr::read_tsv(file = paste0("https://nwis.waterdata.usgs.gov/nwis/uv?cb_00045=on&cb_00060=on&cb_00065=on&format=rdb&site_no=",station_code,"&period=&begin_date=",date_start,"&end_date=",date_end),
col_names = c("agency", "site_no", "datetime", "timezone",
"gage_height_ft", "gage_height_cd",
"discharge_cfs", "discharge_cd",
"precip_in", "precip_cd"),
col_types = "ccTcncncnc",
comment = "#")
df <- df[3:nrow(df),]
write_csv(df, path = paste0("USGS_",station_code,"_",date_start,"_to_",date_end,"_accessed_",today(),".csv"))
return(df)
}
## Use the function with the USGS site number, the start date of the data you want and the end date of the data that you want
copper2 <- get_usgs("05590050", "2019-06-02", "2019-07-01")
west_okaw <- get_usgs("05591700", "2018-05-01", "2018-11-01")
west_okaw2 <- read.csv(file = "~/USGS_05591700_2018-05-01_to_2018-11-01_accessed_2021-04-09")
## Now that you have the data you can generate the summaries that you are interested in like the mean of each parameter in the data for your survey date
sum_one <- function(df, event_date){
one <- df %>%
filter(lubridate::date(datetime) == event_date) %>%
group_by(site_no) %>%
summarize(date = event_date,
mean_gage = mean(gage_height_ft),
mean_discharge = mean(discharge_cfs),
mean_precip = mean(precip_in))
return(one)
}
## Example for the same day summary
c <- sum_one(west_okaw2,"2018-06-23")
## You can also create a more generic function to generate summaries for any number of days before the sampling event
#### Sum generic ####
sum_gen <- function(df, event_date, days_before){
gen <- df %>%
filter(lubridate::date(datetime) < lubridate::ymd(event_date) + days(1) & lubridate::date(datetime) > lubridate::ymd(event_date) - days(1+days_before)) %>%
group_by(site_no) %>%
summarize(date = event_date,
mean_gage = mean(gage_height_ft),
mean_discharge = mean(discharge_cfs),
mean_precip = mean(precip_in))
return(gen)
}
## Examples for the same day summaries (0 days before), 3 day summary, and 7 day summary
c0 <-sum_gen(west_okaw2, "2018-06-23", 0)
c3 <-sum_gen(west_okaw2, "2018-06-23", 3)
c7 <-sum_gen(west_okaw2, "2018-06-23", 7)
## You could then create a function to read over all of the data that you need and calculate the summaries using
## the generic function that was created above
data_summary <- function(df, date){
one <- sum_gen(df, date, 0)
three <- sum_gen(df, date, 3)
seven <- sum_gen(df, date, 7)
dat <- one %>%
full_join(three, by = c("site_no", "date"), suffix = c("", ".3Day")) %>%
full_join(seven, by = c("site_no", "date"), suffix = c("", ".7Day"))
write_csv(dat, path = paste0(df,"_usgs_gauge_summary.csv"))
return(dat)
}
cs_sum <- data_summary(copper, "2018-06-23")
wo_sum <- data_summary(west_okaw2, "2018-06-23")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment