Last active
October 12, 2018 17:15
-
-
Save troyhill/6b29802bc5250485294e5cb989bc99b9 to your computer and use it in GitHub Desktop.
An abbreviated R API for SFWMD's DBHYDRO: download station and project data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/Rscript | |
### functions to download DBHYDRO station and project data | |
# setwd("/home/thill/RDATA/dbhydro/data/20180419/") | |
library(httr) | |
# command line arguments: order matters (right now) | |
# arguments must be in order: [project/station name] [working_dir] [reportType: station or project] | |
name <- gsub("[^[:alnum:] ]", "", commandArgs()[6]) | |
working_dir <- sapply(strsplit(commandArgs()[7], split = "\\["), "[", 1) | |
reportType <- commandArgs()[8] | |
cat("item name: ", name) | |
cat("working directory for R: ", working_dir) | |
cat("report type: ", reportType) | |
# setwd(args[7]) | |
dbh_stn_data <- function(destfile = "stn_report_todaysDate.csv", | |
stations = c("ACRA1"), # If one station in the list returns no data, the url request fails. | |
rename_proj = TRUE, | |
parameters = "all", | |
report_type = "full", # full = long dataset - 1 line per sample; crosstab = wide dataset | |
incl_qc_flags = TRUE, | |
incl_flagged_data = FALSE, | |
destination = "file_csv", | |
start_date = "01-JAN-1960", # note format | |
end_date = "today", | |
import_data = FALSE) { | |
if (rename_proj == TRUE) { | |
destfile <- gsub(pattern = "stn", replacement = gsub(pattern = "'", replace = "", x = stations), x = destfile) | |
} | |
if (report_type == "full") { | |
report_type_text <- paste0("&v_report=ctr_1_true") | |
} else if (report_type == "crosstab") { | |
report_type_text <- paste0("&v_report=ctr_w") | |
} else stop("invalid input for 'report_type': must be 'either 'full' or 'crosstab'") | |
if (incl_qc_flags == TRUE) { | |
qc_flags_text <- paste0("&v_exc_qc=N") | |
} else if (incl_qc_flags == FALSE) { | |
qc_flags_text <- paste0("&v_exc_qc=Y") | |
} else stop("invalid input for 'incl_qc_flags': must be TRUE/FALSE") | |
if (incl_flagged_data == FALSE) { | |
flagged_data_text <- paste0("&v_exc_flagged=Y") | |
} else if (incl_flagged_data == TRUE) { | |
flagged_data_text <- paste0("&v_exc_flagged=N") | |
} else stop("invalid input for 'incl_flagged_data': must be TRUE/FALSE") | |
if (!destination == "file_csv") { | |
stop("Currently only csv output is supported. Use argument 'destination = 'file_csv'") | |
} | |
if (end_date == "today") { | |
end_date <- toupper(format(Sys.Date(), "%d-%b-%Y")) | |
} | |
if (grepl("todaysDate", destfile)) { | |
destfile <- gsub(pattern = "todaysDate", replacement = format(Sys.Date(), "%Y%m%d"), x = destfile) | |
} | |
if (grepl("%25", stations)) { # if wild cards are used in station references, url must use "like" | |
stationRef <- "'%20and%20station_id%20like%20('" | |
} else { # if no wild cards are used, url must use "in" | |
stationRef <- "'%20and%20station_id%20in%20('" | |
} | |
url.init <- paste0("http://my.sfwmd.gov/dbhydroplsql/water_quality_data.report_full?v_where_clause=where%20date_collected%20%3e%20'", | |
start_date, | |
stationRef, stations, "')", | |
"&v_target_code=", destination | |
) | |
httr::GET(url.init, write_disk(destfile, overwrite = TRUE), timeout(99999)) | |
if (import_data == TRUE) { | |
output <- read.csv(destfile) | |
} | |
} | |
dbh_stn_data_batch <- function(codes, ...) { | |
for (i in 1:length(codes)) { | |
tryCatch({ | |
print(codes[i]) | |
dbh_stn_data(stations = codes[i], ...) | |
}, error = function(e) {print("Error in project ", codes[i], ": ", conditionMessage(e), "\n", quote = FALSE)}) | |
} | |
} | |
### dbh_stn_data() usage | |
# input_stns <- c("G300") | |
# test <- dbh_stn_data(stations = input_stns) | |
# summary(test[, 1:4]) | |
# | |
## CAMB stand-in: | |
# input_stns <- c("'G300'") # 'G300','S5A','S5AE','S5AS','S5AU','S5AUS','S5AW' | |
# test <- dbh_stn_data(stations = input_stns) | |
# | |
# input_stns <- c('G300','S5A','S5AE','S5AS','S5AU','S5AUS','S5AW') | |
# for (i in 1:length(input_stns)) { | |
# test <- dbh_stn_data(stations = input_stns[i]) | |
# } | |
# summary(test[, 1:4]) | |
# | |
# dbh_stn_data_batch(codes = input_stns) | |
# | |
# input_stns2 <- c("'ACRA%25'") # wild cards set by "%25" | |
# test2 <- dbh_stn_data(stations = input_stns2) | |
# summary(test2[, 1:4]) | |
# | |
# input_stns3 <- c("'ACRA%25','G300'") # database problem: cannot mix wild-cards and exact station names, or do a multiple-query wildcard search (all return empty .xls) | |
# test3 <- dbh_stn_data(stations = input_stns3) | |
# summary(test3[, 1:4]) | |
### | |
dbh_proj_data <- function(destfile = "proj_report_todaysDate.csv", | |
project_codes = "ACMEB", | |
start_date = "01-JAN-1960", # note format | |
destination = "file_csv", | |
rename_proj = TRUE, # should destination file be re-named to replace "proj" with project name | |
import_data = FALSE | |
) { | |
if (grepl("todaysDate", destfile)) { | |
destfile <- gsub(pattern = "todaysDate", replacement = format(Sys.Date(), "%Y%m%d"), x = destfile) | |
} | |
if (rename_proj == TRUE) { | |
destfile <- gsub(pattern = "proj", replacement = gsub(pattern = "'", replace = "", x = project_codes), x = destfile) | |
} | |
if (grepl("%25", project_codes)) { | |
projectRef <- "project_code='" | |
} else { | |
projectRef <- "project_code='" | |
} | |
url.init <- paste0("http://my.sfwmd.gov/dbhydroplsql/water_quality_data.report_full?v_where_clause=where%20", | |
#"date_collected%20%3e%20='"start_date, | |
projectRef, project_codes, "'", | |
"&v_target_code=", destination | |
) | |
#download.file(url = url.init, destfile) # timeout problems even after setting options(timeout=24000000000000000000000000000000000000000) | |
#RCurl::getURL(url = url.init) # crashed RStudio | |
httr::GET(url.init, write_disk(destfile, overwrite = TRUE), timeout(99999)) | |
if (import_data == TRUE) { | |
output <- read.csv(destfile) | |
} | |
} | |
# dbh_proj_data(project_codes = "CAMB") | |
dbh_proj_data_batch <- function(codes, ...) { | |
for (i in 1:length(codes)) { | |
tryCatch({ | |
print(codes[i]) | |
dbh_proj_data(project_codes = codes[i], ...) | |
}, error = function(e) {print("Error in project ", codes[i], ": ", conditionMessage(e), "\n", quote = FALSE)}) | |
} | |
} | |
### dbh_proj_data_batch() usage | |
# STA1E_project_codes <- c("ENRR", 'ST1E', 'ST1W', 'CAMB', "EAAP", "ST1EM", | |
# "ST1EG", "RAIN", "X", "TM", "PEST", "EVPA", "HGLE", | |
# "ST1M", "ST1G", "LAB", "L8RT" # , "ST1F" appears to be fish data | |
# ) | |
# STA1W_project_codes <- c("ENRM", "ENRP", "ENRU") # duplicates that overlap with STA1E: "ENRR", "LAB", "CAMB", "ST1E", "ST1W", "ST1G", "ST1M", "ST1F", "ST1EM") | |
# STA2_project_codes <- c("STA2", "ST2M", "ST2G", "EAA", "USGS", "HGOS") # duplicates: c("CAMB", "EAAP", "EVPA", "PEST", "RAIN", "X") "ST2F" appears to be fish data | |
# STA34_project_codes <- c("ST34", "ST34G", "ST34M", "A1FEB", "HOLY") # duplicates: c("CAMB", "EAA", "EVPA", "HGOS", "PEST", "RAIN", "USGS", "X") "ST34F" appears to be fish data | |
# STA56_project_codes <- c("ST5R", "STA5", "STA6", "ST6M", "SEMI", "RTBG", "ST5M") # duplicates: c("PEST", "CAMB") "ST6F" appears to be fish data | |
# | |
# dbh_proj_data_batch(codes = STA1E_project_codes) | |
# dbh_proj_data_batch(codes = STA1W_project_codes) | |
# dbh_proj_data_batch(codes = STA2_project_codes) | |
# dbh_proj_data_batch(codes = STA34_project_codes) | |
# dbh_proj_data_batch(codes = STA56_project_codes) | |
### | |
if (reportType %in% "project") { | |
dbh_proj_data_batch(destfile = paste0(working_dir,"/", "proj_report_todaysDate.csv"), codes = c(name)) | |
} | |
if (reportType %in% "station") { | |
dbh_stn_data_batch(destfile = paste0(working_dir,"/", "stn_report_todaysDate.csv"), codes = c(name)) | |
} | |
### |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment