Last active
May 6, 2021 21:02
-
-
Save lehostert/7794a629f99a78b82c13d2e013098fb7 to your computer and use it in GitHub Desktop.
read and summarize USGS gauge data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#### For Loop #### | |
setwd("~/GitHub/Gists/gauge_summary/data") | |
### This is an example of using the tidyverse with functional programming to get a df/ tibble of the gauges called "gauge list" | |
### THis is a tibble/dataframe and not a list | |
gauge_list <- sites %>% | |
select(gauge_station) %>% | |
unique() | |
### If you wanted to do it this was and then you need to explictly specify that | |
gauge_list_actual <- sites %>% | |
select(gauge_station) %>% | |
unique() %>% | |
as.list() | |
# OR | |
also_a_list <- as.list(gauge_list) | |
### In order to use for loops you must have an iterable to be working over. This can be a vector or a list but not a dataframe | |
## If you select one column from a data frame it is a vector and therefore usable | |
is.vector(sites$gauge_station) | |
is.vector(gauge_list) | |
is.vector(gauge_list_actual) | |
is.vector(also_a_list) | |
### the value that index value (part before 'in') can be anything because it is a variable. I like to call theme something | |
### that helps be remember the purpose of the list but I could call it 'x' below and it would work the same. | |
## For Loop Example | |
for (gauge in unique(sites$gauge_station)) { | |
date_start <- "2019-05-31" | |
date_end <- "2019-07-01" | |
get_usgs(gauge, date_start, date_end) | |
website <- paste0("Website for ",gauge," is: ","https://nwis.waterdata.usgs.gov/nwis/uv?cb_00045=on&cb_00060=on&cb_00065=on&format=rdb&site_no=",gauge,"&period=&begin_date=",date_start,"&end_date=",date_end) | |
print(website) | |
} | |
### Nested loop examples | |
## Every combination of x & y | |
for (x in sites$site) { | |
for (y in sites$date) { | |
print(paste(x,y)) | |
} | |
} | |
### | |
#### Puuur map Example | |
printer <- function(gauge_no, sample_date){ | |
print(paste("Gauge Station", gauge_no, "should be sampled on", sample_date)) | |
} | |
printer(555, "2020-10-04") | |
### Attempt to try this pmpa thing | |
printer <- function(gauge_station, date, ...){ | |
x <- paste("Gauge Station", gauge_station, "should be sampled on", date) | |
print(x) | |
} | |
purrr::pmap(sites, printer) | |
#### try it with a simple function | |
sum_one <- function(df, event_date, ...){ | |
one <- df %>% | |
filter(lubridate::date(datetime) == event_date) %>% | |
group_by(site_no) %>% | |
summarize(date = event_date, | |
mean_gage = mean(gage_height_ft), | |
mean_discharge = mean(discharge_cfs), | |
mean_precip = mean(precip_in)) | |
return(one) | |
} | |
c <- sum_one(copper2,"2019-06-23") | |
#TODO Can you use next() to go through a whole list of sites. | |
#### Toy examples | |
for (gauge in unique(sites$gauge_station)) { | |
print(gauge) | |
} | |
#### Below here be dragons #### | |
sum_two <- function(x,y){ | |
s <- x + y | |
return(s) | |
} | |
sum_two(3,2) | |
data <- read.table(text="x y | |
1 2 | |
2 3 | |
3 4 | |
4 5", header=TRUE) | |
length(data[,2]) | |
for (x in 1:nrow(data[1])) { | |
for(y in 1:nrow(data[2])){ | |
result <- sum_two(x, y) | |
print(result) | |
} | |
} | |
x <- c(1,2,3,4) | |
y <- c(2,3,4,5) | |
for (i in 1:length(x)) { | |
for(i in 1:length(y)){ | |
result <- sum_two(x[i], y[i]) | |
print(result) | |
} | |
} | |
###@ purrr::map Function #### | |
purrr::pmap_dfr(,sum_two) | |
purrr::map(sites, "gauge_station") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(lubridate) | |
#Set your current working directory so that you will know where any generated files are going | |
# setwd() | |
## Read in example data set with sites, sample dates, and closest USGS station locations | |
sites <- read.table(text="site date gauge_station | |
A 2018-06-23 05590050 | |
B 2018-07-21 05590050 | |
C 2018-08-24 05590050 | |
D 2018-09-30 05591700", header=TRUE, colClasses = c("character", "Date", "character")) | |
## Make sure our dates are actually set to class "Date" for future calculations | |
sites$date <- as.Date(sites$date) | |
## function for reading in data from USGS website as .TSV/RDB and saving as a .csv in your current working directory | |
get_usgs <- function(station_code, date_start, date_end){ | |
df <- readr::read_tsv(file = paste0("https://nwis.waterdata.usgs.gov/nwis/uv?cb_00045=on&cb_00060=on&cb_00065=on&format=rdb&site_no=",station_code,"&period=&begin_date=",date_start,"&end_date=",date_end), | |
col_names = c("agency", "site_no", "datetime", "timezone", | |
"gage_height_ft", "gage_height_cd", | |
"discharge_cfs", "discharge_cd", | |
"precip_in", "precip_cd"), | |
col_types = "ccTcncncnc", | |
comment = "#") | |
df <- df[3:nrow(df),] | |
write_csv(df, path = paste0("USGS_",station_code,"_",date_start,"_to_",date_end,"_accessed_",today(),".csv")) | |
return(df) | |
} | |
## Use the function with the USGS site number, the start date of the data you want and the end date of the data that you want | |
copper2 <- get_usgs("05590050", "2019-06-02", "2019-07-01") | |
west_okaw <- get_usgs("05591700", "2018-05-01", "2018-11-01") | |
west_okaw2 <- read.csv(file = "~/USGS_05591700_2018-05-01_to_2018-11-01_accessed_2021-04-09") | |
## Now that you have the data you can generate the summaries that you are interested in like the mean of each parameter in the data for your survey date | |
sum_one <- function(df, event_date){ | |
one <- df %>% | |
filter(lubridate::date(datetime) == event_date) %>% | |
group_by(site_no) %>% | |
summarize(date = event_date, | |
mean_gage = mean(gage_height_ft), | |
mean_discharge = mean(discharge_cfs), | |
mean_precip = mean(precip_in)) | |
return(one) | |
} | |
## Example for the same day summary | |
c <- sum_one(west_okaw2,"2018-06-23") | |
## You can also create a more generic function to generate summaries for any number of days before the sampling event | |
#### Sum generic #### | |
sum_gen <- function(df, event_date, days_before){ | |
gen <- df %>% | |
filter(lubridate::date(datetime) < lubridate::ymd(event_date) + days(1) & lubridate::date(datetime) > lubridate::ymd(event_date) - days(1+days_before)) %>% | |
group_by(site_no) %>% | |
summarize(date = event_date, | |
mean_gage = mean(gage_height_ft), | |
mean_discharge = mean(discharge_cfs), | |
mean_precip = mean(precip_in)) | |
return(gen) | |
} | |
## Examples for the same day summaries (0 days before), 3 day summary, and 7 day summary | |
c0 <-sum_gen(west_okaw2, "2018-06-23", 0) | |
c3 <-sum_gen(west_okaw2, "2018-06-23", 3) | |
c7 <-sum_gen(west_okaw2, "2018-06-23", 7) | |
## You could then create a function to read over all of the data that you need and calculate the summaries using | |
## the generic function that was created above | |
data_summary <- function(df, date){ | |
one <- sum_gen(df, date, 0) | |
three <- sum_gen(df, date, 3) | |
seven <- sum_gen(df, date, 7) | |
dat <- one %>% | |
full_join(three, by = c("site_no", "date"), suffix = c("", ".3Day")) %>% | |
full_join(seven, by = c("site_no", "date"), suffix = c("", ".7Day")) | |
write_csv(dat, path = paste0(df,"_usgs_gauge_summary.csv")) | |
return(dat) | |
} | |
cs_sum <- data_summary(copper, "2018-06-23") | |
wo_sum <- data_summary(west_okaw2, "2018-06-23") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment