Last active
June 4, 2019 14:12
-
-
Save troyhill/bada3b6200bb73f7ccf5447f399b9cd9 to your computer and use it in GitHub Desktop.
fireHydro crontab 20190221
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### may be necessary: install dev version of curl for google drive | |
# install.packages("https://github.com/jeroenooms/curl/archive/master.tar.gz", repos = NULL) | |
pkgs.used <- c("devtools", "sf", "ggplot2", "gmailr", "googledrive") | |
pkgs.to.install <- pkgs.used[!pkgs.used %in% installed.packages()] | |
if (length(pkgs.to.install) > 0) { | |
install.packages(pkgs.to.install) | |
} | |
if (!"fireHydro" %in% installed.packages()) { | |
devtools::install_github("troyhill/fireHydro") | |
} | |
library(sf) | |
library(ggplot2) | |
library(gmailr) | |
library(googledrive) | |
library(fireHydro) | |
start.time <- Sys.time() | |
# setwd("/home/thill") | |
### output file location can be specified using a command-line argument starting with "outputFolder_" | |
### e.g., Rscript /home/thill/RDATA/test_cron_20181219.R --outputFolder_/opt/physical/troy/cron_output >output 2>&1 | |
inputArg <- grep(x = commandArgs(), pattern = "--outputFolder_", value = TRUE) | |
outputFolder <- gsub(x = inputArg, pattern = "--outputFolder_", replacement = "") | |
if (length(outputFolder) == 0) { | |
### if no output directory is specified, use this one: | |
outputFolder <- "/home/thill/RDATA" | |
} | |
### two approaches: | |
### get today's map (if available): | |
todaysDate <- gsub(x = Sys.Date(), pattern = "-", replacement = "") | |
yr <- substr(todaysDate, 1, 4) | |
### find most recent EDEN data | |
targetFile <- tail(list.files(paste0("/opt/physical/gis/eden/", yr)),1) | |
fileNameOnly <- substr(targetFile, 1, nchar(targetFile) - 4) | |
fileDateOnly <- substr(fileNameOnly, 9, nchar(fileNameOnly)) | |
fileDate2 <- format(x = strptime(x = as.character(fileDateOnly), format = "%Y%m%d"), "%d-%b-%Y") | |
# a <- sf::st_read(paste0("/opt/physical/gis/eden/", yr, "/", fileNameOnly, ".shp")) | |
### see if most recent EDEN data has already been processed: | |
### this line is useful if a shared drive is being used: | |
outputFolderFiles <- list.files(paste0(outputFolder, "/shp")) | |
outputFolderFiles.full <- list.files(paste0(outputFolder, "/shp"), full.names = TRUE) | |
processedDates <- gsub(x = outputFolderFiles, pattern = "fireHydro_|\\.prj|\\.shp|\\.shx|\\.dbf|\\.gpkg", replacement = "") | |
numericProcessedDates <- sort(as.numeric(unique(gsub(x = outputFolderFiles, pattern = "^[^_]*_|\\.[^_]*$", replacement = ""))), decreasing = TRUE) | |
### if google drive is being used, check there instead: 20190301: issues with tokens preclude sending email after running this line | |
# files <- googledrive::drive_find(pattern = "fireHydro|FireSpreadRisk|WaterLevels", verbose = FALSE) # "\\.shp|\\.prj|\\.shx|\\.dbf|\\.pdf") | |
# processedDates <- sort(as.numeric(unique(gsub(x = files$name, pattern = "^[^_]*_|\\.[^_]*$", replacement = ""))), decreasing = TRUE) | |
if (!fileDateOnly %in% processedDates) { # if dates are not the same, run fireHydro and email output | |
### make pdfs and shp for folder | |
fireHydroFilename <- paste0("fireHydro_", fileDateOnly, ".gpkg") | |
fireHydro_shapefile <- paste0(outputFolder, "/shp/", fireHydroFilename) | |
waterLevelPdf <- paste0(outputFolder, "/pdf/WaterLevels_", fileDateOnly, ".pdf") | |
waterLevelPng <- paste0(outputFolder, "/pdf/WaterLevels_", fileDateOnly, ".png") | |
fireSpreadPdf <- paste0(outputFolder, "/pdf/FireSpreadRisk_", fileDateOnly, ".pdf") | |
fireSpreadPng <- paste0(outputFolder, "/pdf/FireSpreadRisk_", fileDateOnly, ".png") | |
suppressWarnings( | |
tmp.shp <- fireHydro::getFireHydro(EDEN_date = fileDateOnly, | |
output_shapefile = NULL, #fireHydro_shapefile, | |
waterLevelExport = c(waterLevelPdf, waterLevelPng), | |
returnShp = TRUE, | |
fireSpreadExport = c(fireSpreadPdf, fireSpreadPng), | |
figureWidth = 6.5, figureHeight = 4, ggBaseSize = 12, | |
burnHist = TRUE) | |
) | |
sf::st_write(obj = tmp.shp, dsn = "temp.gpkg", delete_layer = TRUE, update = FALSE, delete_dsn = TRUE) | |
# rename file - there seems to be some constraint on filenames in sf::st_write | |
# and copy to physical drive. TODO: add physical drive to trash collection | |
file.rename("temp.gpkg", fireHydroFilename) | |
file.copy(fireHydroFilename, fireHydro_shapefile) | |
### make pngs for emailing - 20190301 - fireHydro v0.0.5 rendered this second command obsolete. | |
# fireHydro::getFireHydro(EDEN_date = fileDateOnly, | |
# output_shapefile = NULL, | |
# waterLevelExport = paste0(outputFolder, "/pdf/WaterLevels_", fileDateOnly, ".png"), | |
# fireSpreadExport = paste0(outputFolder, "/pdf/FireSpreadRisk_", fileDateOnly, ".png"), | |
# figureWidth = 6.5, figureHeight = 4, ggBaseSize = 12) | |
# sf::st_write(obj = BICY_EVER_PlanningUnits, "deleteMeNow.shp", | |
# delete_layer = TRUE, driver = "ESRI Shapefile") | |
# email output ------------------------------------------------------------ | |
# Store html body as a variable per https://stackoverflow.com/questions/40761778/gmailr-attachement-wont-allow-body-text-to-be-displayed | |
body <- paste0("Attached are updated maps showing water levels and estimated fire spread risk for ", fileDate2, ". Shapefile and pdf versions of these maps are available on google drive: https://drive.google.com/open?id=1RdVqevubJf8QSIBlLITeHZeWKf_zu5yL. | |
This is an automated email sent when new EDEN water level data are available.") | |
mime() %>% | |
to(c("[email protected]", "[email protected]", | |
"[email protected]", "[email protected]", | |
"[email protected]", "[email protected]", | |
"[email protected]", | |
"[email protected]")) %>% | |
bcc("[email protected]") %>% | |
from("[email protected]") %>% | |
text_body(body = "Lorem ipsum") -> text_msg | |
### use during testing: | |
# mime() %>% | |
# to(c("[email protected]")) %>% | |
# from("[email protected]") %>% | |
# text_body(body = "lorem ipsum") -> text_msg | |
### | |
### linux | |
text_msg %>% | |
subject(paste0("Fire-hydro output: ", fileDate2)) %>% | |
html_body(body)%>% | |
attach_part(body) %>% | |
attach_file(waterLevelPng) %>% attach_file(fireSpreadPng) -> file_attachment | |
# tryMessage <- function(emailMessage, maxAttempts = 5) { | |
# r <- NULL | |
# attempt <- 0 | |
# while( is.null(r) && attempt <= maxAttempts ) { | |
# attempt <- attempt + 1 | |
# try( | |
# r <- gmailr::send_message(emailMessage) | |
# return(1) | |
# ) | |
# } | |
# } | |
# | |
# tryMessage(emailMessage = file_attachment) | |
send_message(file_attachment) | |
# Upload files to google drive -------------------------------------------------- | |
drive_upload_mod( | |
mediaInput = waterLevelPdf, | |
pathInput = "FireHydro output/pdf/") | |
drive_upload_mod( | |
mediaInput = fireSpreadPdf, | |
pathInput = "FireHydro output/pdf/") | |
drive_upload_mod( | |
mediaInput = fireHydroFilename, | |
pathInput = "FireHydro output/shp/") | |
if (grepl(x = fireHydroFilename, pattern = "\\.shp")) { | |
# if a .shp is produced, upload all associated files. | |
drive_upload_mod( | |
mediaInput = gsub(x = fireHydroFilename, pattern = "\\.shp", replacement = "\\.shx"), | |
pathInput = "FireHydro output/shp/") | |
drive_upload_mod( | |
mediaInput = gsub(x = fireHydroFilename, pattern = "\\.shp", replacement = "\\.prj"), | |
pathInput = "FireHydro output/shp/") | |
drive_upload_mod( | |
mediaInput = gsub(x = fireHydroFilename, pattern = "\\.shp", replacement = "\\.dbf"), | |
pathInput = "FireHydro output/shp/") | |
} | |
# Clean up google drive --------------------------------------------------- | |
files <- googledrive::drive_find(pattern = "fireHydro|FireSpreadRisk|WaterLevels", verbose = FALSE) # "\\.shp|\\.prj|\\.shx|\\.dbf|\\.pdf") | |
processedDates_goog <- sort(as.numeric(unique(gsub(x = files$name, pattern = "^[^_]*_|\\.[^_]*$", replacement = ""))), decreasing = TRUE) | |
if (length(processedDates_goog) > 3) { # select the most recent three entries, if more than three entries are present | |
processedDates_goog <- processedDates_goog[1:3] | |
} | |
# dates to preserve | |
targetPattern <- unlist(paste(processedDates_goog, collapse="|")) | |
# identify ids containing these dates and "fireHydro" | |
targetIDs <- grep(x = files$name, pattern = targetPattern, value = TRUE, invert = TRUE) | |
toBeRemoved <- files[files$name %in% targetIDs, ] # necessary bc googledrive works with tibbles | |
if (length(targetIDs) > 0) { | |
googledrive::drive_rm(toBeRemoved) | |
} | |
# Clean up local files ---------------------------------------------------- | |
# identify most recent three dates after new files have been created | |
outputFolderFiles_end <- list.files(paste0(outputFolder, "/shp"), full.names = TRUE) | |
numericProcessedDates_end <- sort(as.numeric(unique(gsub(x = outputFolderFiles_end, pattern = "^[^_]*_|\\.[^_]*$", replacement = ""))), decreasing = TRUE) | |
if (length(numericProcessedDates_end) > 3) { # reduce to the most recent three entries, if more than three entries are present | |
numericProcessedDates_end <- numericProcessedDates_end[1:3] | |
} | |
# Remove all shapefiles except the most recent three dates | |
targetPattern_local <- unlist(paste(numericProcessedDates_end, collapse="|")) | |
# identify ids containing these dates and "fireHydro" | |
targetIDs_local <- grep(x = outputFolderFiles_end, pattern = targetPattern_local, value = TRUE, invert = TRUE) | |
# toBeRemoved_local <- outputFolderFiles.full[outputFolderFiles.full %in% targetIDs_local] # this seems strictly duplicative. | |
if (length(targetIDs_local) > 0) { | |
file.remove(targetIDs_local) | |
cat("\n removed file(s):\n", paste0(targetIDs_local, "\n"), "\n") | |
} | |
# Remove all pdfs/pngs except the most recent three dates | |
outputFolderFiles.png <- list.files(paste0(outputFolder, "/pdf"), full.names = TRUE) | |
# identify ids containing these dates and "fireHydro" | |
targetIDs_png <- grep(x = outputFolderFiles.png, pattern = targetPattern_local, value = TRUE, invert = TRUE) | |
# toBeRemoved_png <- outputFolderFiles.png[outputFolderFiles.png %in% targetIDs_png] # this seems strictly duplicative. | |
if (length(targetIDs_png) > 0) { | |
file.remove(targetIDs_png) | |
cat("\n removed file(s):\n", paste0(targetIDs_png, "\n"), "\n") | |
cat("\n\n DEBUGGING pdf removal \n\n", | |
"length(targetIDs_png) = ", length(targetIDs_png), "\n", | |
"grep(x = outputFolderFiles.png, pattern = targetPattern_local, value = TRUE, invert = TRUE) yields ", | |
grep(x = outputFolderFiles.png, pattern = targetPattern_local, value = TRUE, invert = TRUE), | |
"\n grep(x = outputFolderFiles.png, pattern = targetPattern_local, value = TRUE, invert = FALSE) yields ", | |
grep(x = outputFolderFiles.png, pattern = targetPattern_local, value = TRUE, invert = FALSE), | |
"\n where outputFolderFiles.png = ", | |
outputFolderFiles.png, | |
"\n and targetPattern_local = ", | |
targetPattern_local, | |
"\n\n") | |
} | |
### now clean up files created by this script | |
if (file.exists(fireHydroFilename)) { | |
# Delete file if it exists | |
file.remove(fireHydroFilename) | |
cat("removed file: ", fireHydroFilename) | |
} | |
} | |
timeChange <- round(difftime(Sys.time(), start.time, units = "mins"), 1) | |
cat("\n", "script run time: ", timeChange, " mins", "\n\n") | |
# Notify me of successful script completion even if no new product -------- | |
if (timeChange < 3) { | |
body <- paste0("fire-hydro script ran successfully in ", | |
timeChange, " minutes, with no new output. The most recent EDEN data is from ", fileDate2, " | |
output: | |
", | |
readChar("/home/thill/output", file.info("/home/thill/output")$size) | |
) | |
mime() %>% | |
to("[email protected]") %>% | |
from("[email protected]") %>% | |
text_body(body = "lorem ipsum") -> text_msg | |
### linux | |
text_msg %>% | |
subject(paste0("Fire-hydro script completion")) %>% | |
html_body(body) %>% | |
attach_part(body) -> msg | |
send_message(msg) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment