lehostert · May 6, 2021 21:02
diff --git a/gauge_loops.R b/gauge_loops.R
 #### For Loop ####

 setwd("~/GitHub/Gists/gauge_summary/data")

 ### This is an example of using the tidyverse with functional programming to get a df/ tibble of the gauges called "gauge list"
 ### THis is a tibble/dataframe and not a list
 gauge_list <- sites %>% 
  select(gauge_station) %>% 
  unique()

 ### If you wanted to do it this was and then you need to explictly specify that
 gauge_list_actual <- sites %>% 
  select(gauge_station) %>% 
  unique() %>% 
  as.list()

 # OR
 also_a_list <- as.list(gauge_list)

 ### In order to use for loops you must have an iterable to be working over. This can be a vector or a list but not a dataframe
 ## If you select one column from a data frame it is a vector and therefore usable
 is.vector(sites$gauge_station)
 is.vector(gauge_list)
 is.vector(gauge_list_actual)
 is.vector(also_a_list)

 ### the value that index value (part before 'in') can be anything because it is a variable. I like to call theme something 
 ### that helps be remember the purpose of the list but I could call it 'x' below and it would work the same. 

 ## For Loop Example
 for (gauge in unique(sites$gauge_station)) {
  date_start <- "2019-05-31"
  date_end <- "2019-07-01"
   get_usgs(gauge, date_start, date_end)
   website <- paste0("Website for ",gauge," is: ","https://nwis.waterdata.usgs.gov/nwis/uv?cb_00045=on&cb_00060=on&cb_00065=on&format=rdb&site_no=",gauge,"&period=&begin_date=",date_start,"&end_date=",date_end)
   print(website)
 }



 ### Nested loop examples

 ## Every combination of x & y
 for (x in sites$site) {
  for (y in sites$date) {
    print(paste(x,y))
  }
 }

 ###


 #### Puuur map Example

 printer <- function(gauge_no, sample_date){
  print(paste("Gauge Station", gauge_no, "should be sampled on", sample_date))
 }


 printer(555, "2020-10-04")

 ### Attempt to try this pmpa thing
 printer <- function(gauge_station, date, ...){
  x <- paste("Gauge Station", gauge_station, "should be sampled on", date)
  print(x)
 }


 purrr::pmap(sites, printer)


 #### try it with a simple function

 sum_one <- function(df, event_date, ...){
  one <- df %>% 
    filter(lubridate::date(datetime) == event_date) %>% 
    group_by(site_no) %>% 
    summarize(date = event_date,
              mean_gage = mean(gage_height_ft),
              mean_discharge = mean(discharge_cfs),
              mean_precip = mean(precip_in))
  return(one)
 }

 c <- sum_one(copper2,"2019-06-23")








 #TODO Can you use next() to go through a whole list of sites. 


 #### Toy examples

 for (gauge in unique(sites$gauge_station)) {
  
  print(gauge)
 }





 #### Below here be dragons ####
 sum_two <- function(x,y){
  s <- x + y
  return(s)
 }

 sum_two(3,2)

 data <- read.table(text="x  y
 1 2
 2 3
 3 4
 4 5", header=TRUE)

 length(data[,2])

 for (x in 1:nrow(data[1])) {
   for(y in 1:nrow(data[2])){
     result <- sum_two(x, y)
     print(result)
   }
 }



 x <- c(1,2,3,4)
 y <- c(2,3,4,5)

 for (i in 1:length(x)) {
  for(i in 1:length(y)){
    result <- sum_two(x[i], y[i])
    print(result)
  }
 }
 
 ###@ purrr::map Function ####

 purrr::pmap_dfr(,sum_two)


 purrr::map(sites, "gauge_station")
diff --git a/usgs_gauge_data.R b/usgs_gauge_data.R
 library(tidyverse)
 library(lubridate)

 #Set your current working directory so that you will know where any generated files are going
 # setwd()

 ## Read in example data set with sites, sample dates, and closest USGS station locations
 sites <- read.table(text="site  date    gauge_station
 A    2018-06-23    05590050
 B    2018-07-21    05590050
 C    2018-08-24    05590050
 D    2018-09-30    05591700", header=TRUE, colClasses = c("character", "Date", "character"))

 ## Make sure our dates are actually set to class "Date" for future calculations
 sites$date <- as.Date(sites$date)

 ## function for reading in data from USGS website as .TSV/RDB and saving as a .csv in your current working directory
 get_usgs <- function(station_code, date_start, date_end){
  df <- readr::read_tsv(file = paste0("https://nwis.waterdata.usgs.gov/nwis/uv?cb_00045=on&cb_00060=on&cb_00065=on&format=rdb&site_no=",station_code,"&period=&begin_date=",date_start,"&end_date=",date_end),
                         col_names = c("agency", "site_no", "datetime", "timezone",
                                       "gage_height_ft", "gage_height_cd",
                                       "discharge_cfs", "discharge_cd",
                                       "precip_in", "precip_cd"),
                        col_types = "ccTcncncnc",
                        comment = "#")
  df <- df[3:nrow(df),]
  write_csv(df, path = paste0("USGS_",station_code,"_",date_start,"_to_",date_end,"_accessed_",today(),".csv"))
  return(df)
  }


 ## Use the function with the USGS site number, the start date of the data you want and the end date of the data that you want
 copper2 <- get_usgs("05590050", "2019-06-02", "2019-07-01")
 west_okaw <- get_usgs("05591700", "2018-05-01", "2018-11-01")

 west_okaw2 <- read.csv(file = "~/USGS_05591700_2018-05-01_to_2018-11-01_accessed_2021-04-09")

 ## Now that you have the data you can generate the summaries that you are interested in like the mean of each parameter in the data for your survey date
 sum_one <- function(df, event_date){
  one <- df %>% 
    filter(lubridate::date(datetime) == event_date) %>% 
    group_by(site_no) %>% 
    summarize(date = event_date,
              mean_gage = mean(gage_height_ft),
              mean_discharge = mean(discharge_cfs),
              mean_precip = mean(precip_in))
  return(one)
 }

 ## Example for the same day summary
 c <- sum_one(west_okaw2,"2018-06-23")

 ## You can also create a more generic function to generate summaries for any number of days before the sampling event
 #### Sum generic ####

 sum_gen <- function(df, event_date, days_before){
  gen <- df %>% 
    filter(lubridate::date(datetime) < lubridate::ymd(event_date) + days(1) & lubridate::date(datetime) > lubridate::ymd(event_date) - days(1+days_before)) %>% 
    group_by(site_no) %>% 
    summarize(date = event_date,
              mean_gage = mean(gage_height_ft),
              mean_discharge = mean(discharge_cfs),
              mean_precip = mean(precip_in))
  return(gen)
 }

 ## Examples for the same day summaries (0 days before), 3 day summary, and 7 day summary
 c0 <-sum_gen(west_okaw2, "2018-06-23", 0)
 c3 <-sum_gen(west_okaw2, "2018-06-23", 3)
 c7 <-sum_gen(west_okaw2, "2018-06-23", 7)

 ## You could then create a function to read over all of the data that you need and calculate the summaries using 
 ## the generic function that was created above

 data_summary <- function(df, date){
  one <- sum_gen(df, date, 0)
  three <- sum_gen(df, date, 3)
  seven <- sum_gen(df, date, 7)
  dat <- one %>% 
    full_join(three, by = c("site_no", "date"), suffix = c("", ".3Day")) %>% 
    full_join(seven, by = c("site_no", "date"), suffix = c("", ".7Day"))
  write_csv(dat, path = paste0(df,"_usgs_gauge_summary.csv"))
  return(dat)
 }

 cs_sum <- data_summary(copper, "2018-06-23")
 wo_sum <- data_summary(west_okaw2, "2018-06-23")
	#### For Loop ####

	setwd("~/GitHub/Gists/gauge_summary/data")

	### This is an example of using the tidyverse with functional programming to get a df/ tibble of the gauges called "gauge list"
	### THis is a tibble/dataframe and not a list
	gauge_list <- sites %>%
	select(gauge_station) %>%
	unique()

	### If you wanted to do it this was and then you need to explictly specify that
	gauge_list_actual <- sites %>%
	select(gauge_station) %>%
	unique() %>%
	as.list()

	# OR
	also_a_list <- as.list(gauge_list)

	### In order to use for loops you must have an iterable to be working over. This can be a vector or a list but not a dataframe
	## If you select one column from a data frame it is a vector and therefore usable
	is.vector(sites$gauge_station)
	is.vector(gauge_list)
	is.vector(gauge_list_actual)
	is.vector(also_a_list)

	### the value that index value (part before 'in') can be anything because it is a variable. I like to call theme something
	### that helps be remember the purpose of the list but I could call it 'x' below and it would work the same.

	## For Loop Example
	for (gauge in unique(sites$gauge_station)) {
	date_start <- "2019-05-31"
	date_end <- "2019-07-01"
	get_usgs(gauge, date_start, date_end)
	website <- paste0("Website for ",gauge," is: ","https://nwis.waterdata.usgs.gov/nwis/uv?cb_00045=on&cb_00060=on&cb_00065=on&format=rdb&site_no=",gauge,"&period=&begin_date=",date_start,"&end_date=",date_end)
	print(website)
	}



	### Nested loop examples

	## Every combination of x & y
	for (x in sites$site) {
	for (y in sites$date) {
	print(paste(x,y))
	}
	}

	###


	#### Puuur map Example

	printer <- function(gauge_no, sample_date){
	print(paste("Gauge Station", gauge_no, "should be sampled on", sample_date))
	}


	printer(555, "2020-10-04")

	### Attempt to try this pmpa thing
	printer <- function(gauge_station, date, ...){
	x <- paste("Gauge Station", gauge_station, "should be sampled on", date)
	print(x)
	}


	purrr::pmap(sites, printer)


	#### try it with a simple function

	sum_one <- function(df, event_date, ...){
	one <- df %>%
	filter(lubridate::date(datetime) == event_date) %>%
	group_by(site_no) %>%
	summarize(date = event_date,
	mean_gage = mean(gage_height_ft),
	mean_discharge = mean(discharge_cfs),
	mean_precip = mean(precip_in))
	return(one)
	}

	c <- sum_one(copper2,"2019-06-23")








	#TODO Can you use next() to go through a whole list of sites.


	#### Toy examples

	for (gauge in unique(sites$gauge_station)) {

	print(gauge)
	}





	#### Below here be dragons ####
	sum_two <- function(x,y){
	s <- x + y
	return(s)
	}

	sum_two(3,2)

	data <- read.table(text="x y
	1 2
	2 3
	3 4
	4 5", header=TRUE)

	length(data[,2])

	for (x in 1:nrow(data[1])) {
	for(y in 1:nrow(data[2])){
	result <- sum_two(x, y)
	print(result)
	}
	}



	x <- c(1,2,3,4)
	y <- c(2,3,4,5)

	for (i in 1:length(x)) {
	for(i in 1:length(y)){
	result <- sum_two(x[i], y[i])
	print(result)
	}
	}

	###@ purrr::map Function ####

	purrr::pmap_dfr(,sum_two)


	purrr::map(sites, "gauge_station")
	library(tidyverse)
	library(lubridate)

	#Set your current working directory so that you will know where any generated files are going
	# setwd()

	## Read in example data set with sites, sample dates, and closest USGS station locations
	sites <- read.table(text="site date gauge_station
	A 2018-06-23 05590050
	B 2018-07-21 05590050
	C 2018-08-24 05590050
	D 2018-09-30 05591700", header=TRUE, colClasses = c("character", "Date", "character"))

	## Make sure our dates are actually set to class "Date" for future calculations
	sites$date <- as.Date(sites$date)

	## function for reading in data from USGS website as .TSV/RDB and saving as a .csv in your current working directory
	get_usgs <- function(station_code, date_start, date_end){
	df <- readr::read_tsv(file = paste0("https://nwis.waterdata.usgs.gov/nwis/uv?cb_00045=on&cb_00060=on&cb_00065=on&format=rdb&site_no=",station_code,"&period=&begin_date=",date_start,"&end_date=",date_end),
	col_names = c("agency", "site_no", "datetime", "timezone",
	"gage_height_ft", "gage_height_cd",
	"discharge_cfs", "discharge_cd",
	"precip_in", "precip_cd"),
	col_types = "ccTcncncnc",
	comment = "#")
	df <- df[3:nrow(df),]
	write_csv(df, path = paste0("USGS_",station_code,"_",date_start,"_to_",date_end,"_accessed_",today(),".csv"))
	return(df)
	}


	## Use the function with the USGS site number, the start date of the data you want and the end date of the data that you want
	copper2 <- get_usgs("05590050", "2019-06-02", "2019-07-01")
	west_okaw <- get_usgs("05591700", "2018-05-01", "2018-11-01")

	west_okaw2 <- read.csv(file = "~/USGS_05591700_2018-05-01_to_2018-11-01_accessed_2021-04-09")

	## Now that you have the data you can generate the summaries that you are interested in like the mean of each parameter in the data for your survey date
	sum_one <- function(df, event_date){
	one <- df %>%
	filter(lubridate::date(datetime) == event_date) %>%
	group_by(site_no) %>%
	summarize(date = event_date,
	mean_gage = mean(gage_height_ft),
	mean_discharge = mean(discharge_cfs),
	mean_precip = mean(precip_in))
	return(one)
	}

	## Example for the same day summary
	c <- sum_one(west_okaw2,"2018-06-23")

	## You can also create a more generic function to generate summaries for any number of days before the sampling event
	#### Sum generic ####

	sum_gen <- function(df, event_date, days_before){
	gen <- df %>%
	filter(lubridate::date(datetime) < lubridate::ymd(event_date) + days(1) & lubridate::date(datetime) > lubridate::ymd(event_date) - days(1+days_before)) %>%
	group_by(site_no) %>%
	summarize(date = event_date,
	mean_gage = mean(gage_height_ft),
	mean_discharge = mean(discharge_cfs),
	mean_precip = mean(precip_in))
	return(gen)
	}

	## Examples for the same day summaries (0 days before), 3 day summary, and 7 day summary
	c0 <-sum_gen(west_okaw2, "2018-06-23", 0)
	c3 <-sum_gen(west_okaw2, "2018-06-23", 3)
	c7 <-sum_gen(west_okaw2, "2018-06-23", 7)

	## You could then create a function to read over all of the data that you need and calculate the summaries using
	## the generic function that was created above

	data_summary <- function(df, date){
	one <- sum_gen(df, date, 0)
	three <- sum_gen(df, date, 3)
	seven <- sum_gen(df, date, 7)
	dat <- one %>%
	full_join(three, by = c("site_no", "date"), suffix = c("", ".3Day")) %>%
	full_join(seven, by = c("site_no", "date"), suffix = c("", ".7Day"))
	write_csv(dat, path = paste0(df,"_usgs_gauge_summary.csv"))
	return(dat)
	}

	cs_sum <- data_summary(copper, "2018-06-23")
	wo_sum <- data_summary(west_okaw2, "2018-06-23")