Skip to content

Instantly share code, notes, and snippets.

@troyhill
Last active October 12, 2018 17:15
Show Gist options
  • Save troyhill/6b29802bc5250485294e5cb989bc99b9 to your computer and use it in GitHub Desktop.
Save troyhill/6b29802bc5250485294e5cb989bc99b9 to your computer and use it in GitHub Desktop.
An abbreviated R API for SFWMD's DBHYDRO: download station and project data
#!/usr/bin/Rscript
### functions to download DBHYDRO station and project data
# setwd("/home/thill/RDATA/dbhydro/data/20180419/")
library(httr)
# command line arguments: order matters (right now)
# arguments must be in order: [project/station name] [working_dir] [reportType: station or project]
name <- gsub("[^[:alnum:] ]", "", commandArgs()[6])
working_dir <- sapply(strsplit(commandArgs()[7], split = "\\["), "[", 1)
reportType <- commandArgs()[8]
cat("item name: ", name)
cat("working directory for R: ", working_dir)
cat("report type: ", reportType)
# setwd(args[7])
dbh_stn_data <- function(destfile = "stn_report_todaysDate.csv",
stations = c("ACRA1"), # If one station in the list returns no data, the url request fails.
rename_proj = TRUE,
parameters = "all",
report_type = "full", # full = long dataset - 1 line per sample; crosstab = wide dataset
incl_qc_flags = TRUE,
incl_flagged_data = FALSE,
destination = "file_csv",
start_date = "01-JAN-1960", # note format
end_date = "today",
import_data = FALSE) {
if (rename_proj == TRUE) {
destfile <- gsub(pattern = "stn", replacement = gsub(pattern = "'", replace = "", x = stations), x = destfile)
}
if (report_type == "full") {
report_type_text <- paste0("&v_report=ctr_1_true")
} else if (report_type == "crosstab") {
report_type_text <- paste0("&v_report=ctr_w")
} else stop("invalid input for 'report_type': must be 'either 'full' or 'crosstab'")
if (incl_qc_flags == TRUE) {
qc_flags_text <- paste0("&v_exc_qc=N")
} else if (incl_qc_flags == FALSE) {
qc_flags_text <- paste0("&v_exc_qc=Y")
} else stop("invalid input for 'incl_qc_flags': must be TRUE/FALSE")
if (incl_flagged_data == FALSE) {
flagged_data_text <- paste0("&v_exc_flagged=Y")
} else if (incl_flagged_data == TRUE) {
flagged_data_text <- paste0("&v_exc_flagged=N")
} else stop("invalid input for 'incl_flagged_data': must be TRUE/FALSE")
if (!destination == "file_csv") {
stop("Currently only csv output is supported. Use argument 'destination = 'file_csv'")
}
if (end_date == "today") {
end_date <- toupper(format(Sys.Date(), "%d-%b-%Y"))
}
if (grepl("todaysDate", destfile)) {
destfile <- gsub(pattern = "todaysDate", replacement = format(Sys.Date(), "%Y%m%d"), x = destfile)
}
if (grepl("%25", stations)) { # if wild cards are used in station references, url must use "like"
stationRef <- "'%20and%20station_id%20like%20('"
} else { # if no wild cards are used, url must use "in"
stationRef <- "'%20and%20station_id%20in%20('"
}
url.init <- paste0("http://my.sfwmd.gov/dbhydroplsql/water_quality_data.report_full?v_where_clause=where%20date_collected%20%3e%20'",
start_date,
stationRef, stations, "')",
"&v_target_code=", destination
)
httr::GET(url.init, write_disk(destfile, overwrite = TRUE), timeout(99999))
if (import_data == TRUE) {
output <- read.csv(destfile)
}
}
dbh_stn_data_batch <- function(codes, ...) {
for (i in 1:length(codes)) {
tryCatch({
print(codes[i])
dbh_stn_data(stations = codes[i], ...)
}, error = function(e) {print("Error in project ", codes[i], ": ", conditionMessage(e), "\n", quote = FALSE)})
}
}
### dbh_stn_data() usage
# input_stns <- c("G300")
# test <- dbh_stn_data(stations = input_stns)
# summary(test[, 1:4])
#
## CAMB stand-in:
# input_stns <- c("'G300'") # 'G300','S5A','S5AE','S5AS','S5AU','S5AUS','S5AW'
# test <- dbh_stn_data(stations = input_stns)
#
# input_stns <- c('G300','S5A','S5AE','S5AS','S5AU','S5AUS','S5AW')
# for (i in 1:length(input_stns)) {
# test <- dbh_stn_data(stations = input_stns[i])
# }
# summary(test[, 1:4])
#
# dbh_stn_data_batch(codes = input_stns)
#
# input_stns2 <- c("'ACRA%25'") # wild cards set by "%25"
# test2 <- dbh_stn_data(stations = input_stns2)
# summary(test2[, 1:4])
#
# input_stns3 <- c("'ACRA%25','G300'") # database problem: cannot mix wild-cards and exact station names, or do a multiple-query wildcard search (all return empty .xls)
# test3 <- dbh_stn_data(stations = input_stns3)
# summary(test3[, 1:4])
###
dbh_proj_data <- function(destfile = "proj_report_todaysDate.csv",
project_codes = "ACMEB",
start_date = "01-JAN-1960", # note format
destination = "file_csv",
rename_proj = TRUE, # should destination file be re-named to replace "proj" with project name
import_data = FALSE
) {
if (grepl("todaysDate", destfile)) {
destfile <- gsub(pattern = "todaysDate", replacement = format(Sys.Date(), "%Y%m%d"), x = destfile)
}
if (rename_proj == TRUE) {
destfile <- gsub(pattern = "proj", replacement = gsub(pattern = "'", replace = "", x = project_codes), x = destfile)
}
if (grepl("%25", project_codes)) {
projectRef <- "project_code='"
} else {
projectRef <- "project_code='"
}
url.init <- paste0("http://my.sfwmd.gov/dbhydroplsql/water_quality_data.report_full?v_where_clause=where%20",
#"date_collected%20%3e%20='"start_date,
projectRef, project_codes, "'",
"&v_target_code=", destination
)
#download.file(url = url.init, destfile) # timeout problems even after setting options(timeout=24000000000000000000000000000000000000000)
#RCurl::getURL(url = url.init) # crashed RStudio
httr::GET(url.init, write_disk(destfile, overwrite = TRUE), timeout(99999))
if (import_data == TRUE) {
output <- read.csv(destfile)
}
}
# dbh_proj_data(project_codes = "CAMB")
dbh_proj_data_batch <- function(codes, ...) {
for (i in 1:length(codes)) {
tryCatch({
print(codes[i])
dbh_proj_data(project_codes = codes[i], ...)
}, error = function(e) {print("Error in project ", codes[i], ": ", conditionMessage(e), "\n", quote = FALSE)})
}
}
### dbh_proj_data_batch() usage
# STA1E_project_codes <- c("ENRR", 'ST1E', 'ST1W', 'CAMB', "EAAP", "ST1EM",
# "ST1EG", "RAIN", "X", "TM", "PEST", "EVPA", "HGLE",
# "ST1M", "ST1G", "LAB", "L8RT" # , "ST1F" appears to be fish data
# )
# STA1W_project_codes <- c("ENRM", "ENRP", "ENRU") # duplicates that overlap with STA1E: "ENRR", "LAB", "CAMB", "ST1E", "ST1W", "ST1G", "ST1M", "ST1F", "ST1EM")
# STA2_project_codes <- c("STA2", "ST2M", "ST2G", "EAA", "USGS", "HGOS") # duplicates: c("CAMB", "EAAP", "EVPA", "PEST", "RAIN", "X") "ST2F" appears to be fish data
# STA34_project_codes <- c("ST34", "ST34G", "ST34M", "A1FEB", "HOLY") # duplicates: c("CAMB", "EAA", "EVPA", "HGOS", "PEST", "RAIN", "USGS", "X") "ST34F" appears to be fish data
# STA56_project_codes <- c("ST5R", "STA5", "STA6", "ST6M", "SEMI", "RTBG", "ST5M") # duplicates: c("PEST", "CAMB") "ST6F" appears to be fish data
#
# dbh_proj_data_batch(codes = STA1E_project_codes)
# dbh_proj_data_batch(codes = STA1W_project_codes)
# dbh_proj_data_batch(codes = STA2_project_codes)
# dbh_proj_data_batch(codes = STA34_project_codes)
# dbh_proj_data_batch(codes = STA56_project_codes)
###
if (reportType %in% "project") {
dbh_proj_data_batch(destfile = paste0(working_dir,"/", "proj_report_todaysDate.csv"), codes = c(name))
}
if (reportType %in% "station") {
dbh_stn_data_batch(destfile = paste0(working_dir,"/", "stn_report_todaysDate.csv"), codes = c(name))
}
###
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment