Last active
August 8, 2020 22:37
-
-
Save mbacou/41c2fa42fa36cef2a19d9291432b8560 to your computer and use it in GitHub Desktop.
Repex for OpenCPU failed child process error
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Repex - download 1200+ raster layers from USGS | |
#' @import data.table stringr | |
#' @importFrom tools file_path_sans_ext | |
#' @export | |
step1 = function( | |
x = c("2001-01-01", "2004-12-31"), | |
dir = tempdir() | |
) { | |
if(!dir.exists(dir)) dir.create(dir) | |
x = as.Date(x) | |
# Build daily time series | |
x = seq.Date(x[1], x[2], "day") | |
# Raster files from USGS server | |
cat = list( | |
proj = "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0", | |
file = function(x) sub("-0*", "", format(x, "rain_%Y-%j.bil")), | |
archive = function(x) sub("-0*", "", format(x, "rain_%Y-%j.tar.gz")), | |
url = "https://edcintl.cr.usgs.gov/downloads/sciweb1/shared/fews/web/africa/daily/rfe/downloads/daily/" | |
) | |
# Build target file names | |
dt = data.table( | |
code = "rfe", | |
date = x, | |
file = eval(cat[["file"]])(x), | |
archive = eval(cat[["archive"]])(x), | |
url = cat$url | |
) | |
# Add absolute download paths and file paths | |
dt[, dest := file.path(dir, tolower(code), archive) | |
][, file := file.path(dir, tolower(code), file)] | |
dir = dt[, unique(dirname(dest))] | |
for(i in dir) if(!dir.exists(i)) dir.create(i, recursive=TRUE) | |
dt[, status := file.exists(file)] | |
tmp = dt[!(status)] | |
# Download from remote | |
if(nrow(tmp) > 0) { | |
message("1) Attempting to download " , nrow(tmp), " archives from ", tmp[1, url], "... ") | |
for(i in seq(1, nrow(tmp), by=50)) { | |
N = min(i+50-1, nrow(tmp)) | |
cat(i, "to", N, "\n") | |
res = try(download.file( | |
tmp[i:N, file.path(url, archive)], | |
tmp[i:N, file.path(dest)], method="libcurl", quiet=TRUE)) | |
if(class(res)[1]=="try-error") warning( | |
"Failed to download any file in this batch") | |
} | |
} else message("1) All caught up, nothing to download.") | |
# Tally | |
dt[, status := file.exists(dest)] | |
if(dt[!(status), .N] > 0) warning( | |
"Failed to download ", dt[!(status), .N], " rasters") | |
if(dt[(status), .N]==0) warning( | |
"No layer was downloaded") | |
# Deflate all archives | |
message("2) Deflating ", dt[(status), uniqueN(dest)], " archives into ", | |
dt[(status), paste(unique(dirname(file)), collapse="\n")]) | |
method = if(!is.na(Sys.getenv("TAR"))) Sys.getenv("TAR") else "internal" | |
for(f in dt[file.exists(dest) & str_sub(dest, -7)==".tar.gz", unique(dest)]) | |
untar(f, exdir=dirname(f), extras="--skip-old-files", tar=method) | |
# Warn if any day over the period is missing from the catalog | |
dt[, status := file.exists(file)] | |
failed = setdiff(dt[(status), date], x) | |
if(length(failed) > 0) warning( | |
"There are ", failed, " missing ", dt[1, time], | |
" data layers over the requested period. Please investigate.") | |
dt[, `:=`( | |
# Make unique layer names | |
layer = make.names(file_path_sans_ext(basename(file)), unique=TRUE), | |
# Clean up | |
archive = NULL, | |
dest = NULL, | |
url = NULL | |
)] | |
return(dt) | |
} | |
#' Repex: spatial point extraction | |
#' @inheritDotParams step1 | |
#' @import parallel data.table | |
#' @importFrom raster crs crs<- extract stack removeTmpFiles rasterOptions | |
#' @examples | |
#' dir <- tempdir() | |
#' dt <- step2(dir=dir) | |
#' | |
#' @export | |
step2 = function( | |
catalog = NULL, | |
pts = c(X=-0.74, Y=9.37), | |
mc.cores = getOption("mc.cores", 3L), | |
...) { | |
catalog = if(missing(catalog)) step1(...) else catalog | |
pts = data.table(id=1, X=pts["X"], Y=pts["Y"]) | |
y = catalog[, seq(min(year(date)), max(year(date)))] | |
# extract() does not multithread with point queries, so force it with mclapply() | |
mc.cores = if(mc.cores > 1 && length(y) > 1) mc.cores else 1L | |
tmp = mclapply(y, | |
mc.cores=mc.cores, mc.silent=FALSE, mc.allow.recursive=FALSE, mc.cleanup=TRUE, | |
function(x) { | |
#rasterOptions(timer=FALSE, chunksize=2e+08, datatype=switch(code, rfe="INT1U", "FLT4S")) | |
r = stack(as.list(catalog[year(date)==x, file]), quick=TRUE, native=TRUE) | |
names(r) = catalog[year(date)==x, layer] | |
res = extract(r, pts[, .(X, Y)]) | |
return(res) | |
}) | |
# Collect all years | |
dt = lapply(tmp, function(x) { | |
res = data.table(x)[, id := pts[, id]] | |
res = melt(res, id.vars="id", variable.name="layer", value.name="value") | |
res[catalog, on=.(layer), date := date][, layer := NULL] | |
}) | |
dt = rbindlist(dt) | |
return(dt) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment