Created
August 11, 2010 07:13
-
-
Save floybix/518612 to your computer and use it in GitHub Desktop.
groundwater database analysis scripts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## groundwater database analysis scripts | |
## Felix Andrews <[email protected]> 2010-08-01 | |
library(RODBC) | |
library(zoo) | |
library(lattice) | |
library(latticeExtra) | |
library(sp) | |
library(rgdal) | |
library(mgcv) | |
library(plyr) ## for progress bars | |
library(maps) ## for world.cities | |
stopifnot(packageDescription("zoo")$Version >= "1.7") | |
stopifnot(packageDescription("lattice")$Version >= "0.19") | |
stopifnot(packageDescription("latticeExtra")$Version >= "0.6-14") | |
lattice.options(default.theme = custom.theme.2()) | |
## use Windows progress bar. otherwise change this to progress_text or progress_tk | |
myprogress <- function(title = "Progress") progress_win(title) | |
setwd("X:/Projects/BOMGW") | |
conn <- odbcConnectAccess("MdbCsv/water.mdb") | |
sqlTables(conn) | |
locs <- sqlFetch(conn, "Locations") | |
locs$DrilledDpth <- as.numeric(gsub(",", "", locs$DrilledDpth)) | |
gmus <- readOGR("agmu_r9nnd_00311a01e_sgeo___", "gmushape") | |
rownames(gmus@data) <- make.unique(as.character(gmus@data$GMU_NAME)) | |
#rivers <- readOGR("river_reaches/arc_reach_shape", "arc_reach") | |
#rivers <- rivers[,c("STATE", "BASIN", "A", "M")] | |
#save(rivers, file = "rivers.Rdata") | |
load("rivers.Rdata") | |
data(world.cities) | |
cities.all <- subset(world.cities, country.etc == "Australia") | |
cities.ok <- subset(world.cities, country.etc == "Australia" & (pop >= 50000) | | |
(name %in% c("Darwin", "Alice Springs", "Kalgoolie", "Broome", "Walgett", | |
"Mount Isa", "Esperance", "Broken Hill", "Coober Pedy", | |
"Bendigo", "Ballarat", "Sale", "Mildura"))) # "Echuca-Moama" | |
source("01_script.R") | |
source("02_timeseries.R") | |
source("03_plot_all_sites.R") | |
source("04_plot_distributions.R") | |
source("05_plot_spatial.R") | |
## now you are ready to run "0_nsw.R" etc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## groundwater database analysis scripts | |
## Felix Andrews <[email protected]> 2010-08-01 | |
do_groundwater_analysis <- | |
function(infolist, state = "VIC", ident = state, | |
refyears = c(1985, 1990, 1995), now = 2007, | |
gmus.state = gmus[gmus$STATE == state,], | |
rivers.state = rivers[rivers$STATE == state,], | |
do.what = c("state", "gmus")) | |
{ | |
if (!file.exists(ident)) dir.create(ident) | |
message("generating distribution plots") | |
png(sprintf("%s/%s_distributions_%%01d.png", ident, ident), | |
width = 3000, height = 1500, res = 300) | |
print(plot_distributions(infolist, ident = ident, now = now)) | |
dev.off() | |
## write out differences from current as shapefile (points) | |
message("writing points to shapefile") | |
deltas <- as.data.frame(t(infolist$deltas)) | |
deltas$stationID <- rownames(deltas) | |
pt.deltas <- merge(locs, deltas) | |
pt.deltas <- subset(pt.deltas, Lng > 120) | |
## need to keep column names to at most 10 characters for writeOGR | |
colnames(pt.deltas) <- substring(colnames(pt.deltas), first = 1, last = 10) | |
pt.deltas <- SpatialPointsDataFrame(coords = pt.deltas[,c("Lng","Lat")], | |
data = pt.deltas) | |
writeOGR(pt.deltas, | |
sprintf("%s/%s_gw_trends", ident, ident), | |
"sites_deltas", | |
driver = "ESRI Shapefile") | |
if ("state" %in% do.what) { | |
for (refyear in refyears) { | |
message("aggregating to GMUs for reference year ", refyear) | |
## calculate quantiles of changes over the set of bores in each GMU | |
qqtmp <- | |
overlayEach(pt.deltas[,toString(refyear)], gmus.state, | |
fn = function(x) { | |
qq <- quantile(x[,1], c(0.5, 0.05, 0.01), na.rm = TRUE) | |
names(qq) <- c("median", "worst_5pc", "worst_1pc") ## max 10 chars! | |
qq[["n_values"]] <- sum(!is.na(x[,1])) | |
}, | |
out.n = 4) | |
ii.na <- is.na(qqtmp[,"n_values"]) | |
qqtmp[ii.na, "n_values"] <- 0 | |
## write out aggregated stats as shapefile (polygons) | |
gmus.ref <- gmus.state | |
gmus.ref@data <- cbind(qqtmp, gmus.ref@data) | |
writeOGR(gmus.ref, | |
sprintf("%s/%s_gw_trends", ident, ident), | |
sprintf("gmus_%i", refyear), | |
driver = "ESRI Shapefile") | |
pt.deltas.ref <- pt.deltas[,c(refyear, "stationID", "longName")] | |
colnames(pt.deltas.ref@data)[1] <- "value" | |
## generate maps (quantiles of changes in GMUs) | |
foo <- | |
gmuLevelPlot(gmus.ref[,1:4], pt.deltas.ref, refyear = refyear, | |
xlab.top = ident) | |
##pdf(sprintf("%s/%s_deltas%i_gmus.pdf", ident, ident, refyear), | |
## paper = "a4r", width = 18, height = 10) | |
##print(foo) | |
##dev.off() | |
png(sprintf("%s/%s_deltas%i_gmus.png", ident, ident, refyear), | |
width = 6000, height = 3000, res = 300) | |
print(foo) | |
dev.off() | |
} | |
} | |
if ("gmus" %in% do.what) { | |
## generate detailed results for each GMU | |
l_ply(seq(1, nrow(gmus.state)), | |
.progress = myprogress("generating detailed results for each GMU"), | |
function (i) | |
{ | |
gmuname <- as.character(gmus.state@data[i, "GMU_NAME"]) | |
gmuno <- as.character(gmus.state@data[i, "GMU_NO"]) | |
message(gmuno, gmuname) | |
gmunameok <- make.names(substr(gmuname, 1, 15)) | |
nameok <- paste(sprintf("%s/%s", ident, ident), gmuno, gmunameok, sep = "_") | |
##pdf(paste(nameok, ".pdf", sep = ""), paper = "a4r", width = 18, height = 10) | |
png(paste(nameok, "%01d.png", sep = ""), width = 3000, height = 1500, res = 300) | |
anyOK <- FALSE | |
for (refyear in refyears) { | |
pt.deltas.ref <- pt.deltas[,c(toString(refyear), "stationID", "longName")] | |
colnames(pt.deltas.ref@data)[1] <- "value" | |
foo <- | |
tryCatch(gmuLevelPlotGAM(gmus.state[,1], pt.deltas.ref, subset = i, | |
refyear = refyear, cities = cities.all), | |
error = force) | |
if (inherits(foo, "error")) { | |
message(foo$message) | |
next | |
} else { | |
if (anyOK == FALSE) ## only once: | |
print(foo$context) | |
anyOK <- TRUE | |
} | |
print(foo$levels + | |
layer(sp.lines(rivers.state, lwd = 2, col = "blue", alpha = 0.5), | |
data = environment()) | |
) | |
} | |
if (anyOK) { | |
foo <- | |
tryCatch(gmuHorizonplot(gmus.state[,1], pt.deltas, subset = i, | |
levelzoo = infolist$levels), | |
error = force) | |
if (inherits(foo, "error")) { | |
message(foo$message) | |
} else { | |
print(foo) | |
print(gmuDistributionTrend(gmus.state[,1], pt.deltas, subset = i, | |
deltas = infolist$deltas)) | |
} | |
} | |
dev.off() | |
}) | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## merge a list of zoo objects (and put all on the same time scale). | |
## return the result as a merged zoo matrix, indexed by seq(start, end, by = by) | |
## This is similar to do.call("merge", tslist) but that can fail due to memory usage. | |
merge_zoolist <- function(tslist, ..., by = 1L) | |
{ | |
start <- do.call(min, lapply(tslist, start)) | |
end <- do.call(max, lapply(tslist, end)) | |
newindex <- seq(start, end, by = by) | |
## put each series on the same index and extract core data | |
coreagg <- llply(tslist, function(z) { | |
if (length(z) == 0) | |
z <- zoo(1) ## need this, otherwise merged series has 0 columns | |
coredata(merge(z, zoo(, newindex), all = c(FALSE, TRUE))) | |
}, ...) | |
ans <- matrix(unlist(coreagg, use.names = FALSE), | |
ncol = length(tslist), nrow = length(newindex)) | |
colnames(ans) <- names(tslist) | |
zoo(ans, newindex) | |
} | |
gw_timeseries_process <- function(tslist, now) | |
{ | |
ans <- list() | |
message("Processing time series...") | |
ntasks <- 4 | |
task.i <- 0 | |
## aggregate to regular monthly (median) series | |
task.i <- task.i + 1 | |
message("Task ", task.i, " out of ", ntasks) | |
ans$monthly <- | |
llply(tslist, | |
.progress = myprogress("Aggregating (by medians) to regular monthly series"), | |
function(z) | |
{ | |
z <- aggregate(z, as.yearmon, | |
function(x) median(coredata(x), na.rm = TRUE)) | |
t <- as.yearmon(seq(as.Date(start(z)), as.Date(end(z)), by = "months")) | |
merge(z, zoo(,t), all = c(FALSE, TRUE)) | |
}) | |
## calculate number of (monthly) observations in each calendar year for each site | |
task.i <- task.i + 1 | |
message("Task ", task.i, " out of ", ntasks) | |
nperyear <- llply(ans$monthly, | |
.progress = myprogress("Counting number of observations each year"), | |
function(z) | |
{ | |
aggregate(z, as.integer, function(z) sum(complete.cases(z))) | |
}) | |
nperyear <- | |
merge_zoolist(nperyear, .progress = myprogress("Merging observations per year to matrix")) | |
coredata(nperyear)[coredata(is.na(nperyear))] <- 0 | |
ans$nperyear <- nperyear | |
## calculate seasonal fluctuations | |
task.i <- task.i + 1 | |
message("Task ", task.i, " out of ", ntasks) | |
ans$fluct <- | |
unlist(llply(ans$monthly, | |
.progress = myprogress("Calculating seasonal fluctuations"), | |
function(z) | |
{ | |
if (sum(!is.na(z)) < 10) return(NA) | |
## first, remove a linear trend | |
# y <- coredata(z) | |
# x <- as.numeric(index(z)) | |
# coredata(z) <- residuals(MASS::rlm(y ~ x, na.action = na.exclude)) | |
median(aggregate(z, floor, function(z) { | |
tmp <- diff(range(z)) | |
if (identical(tmp, 0)) NA else tmp | |
}), na.rm = TRUE) | |
})) | |
## calculate annual values (median of monthly values). | |
## BUT for years with only a single (monthly) value, | |
## replace with the median over at least 3 values in a 5-year window. | |
## -- to avoid influence of errors in single data points | |
task.i <- task.i + 1 | |
message("Task ", task.i, " out of ", ntasks) | |
ans$annual <- | |
llply(ans$monthly, | |
.progress = myprogress("Aggregating (by medians) to annual values; rolling median"), | |
function(z) | |
{ | |
## 1st step: calculate medians for each year | |
## Note floor() applied to 'yearmon' gives the year (for aggregation) | |
zann <- | |
aggregate(z, floor, function(x) { | |
median(coredata(x), na.rm = TRUE) | |
}) | |
znum <- | |
aggregate(z, floor, function(x) { | |
sum(complete.cases(x)) | |
}) | |
## 2nd step: rolling median over chunks of 5 data-years | |
tmp <- na.omit(zann) | |
width.odd <- length(tmp) - (1 - length(tmp)%%2) | |
zrollmed <- | |
rollapply(tmp, width = min(5, width.odd), | |
median, na.pad = TRUE) | |
zrollmed <- merge(zrollmed, zoo(,time(zann))) | |
## 3rd step: use the rolling medians to fill in | |
## years which have fewer than 3 monthly observations. | |
zann[znum < 3] <- zrollmed[znum < 3] | |
## locf = last observation carry forward | |
na.locf(na.trim(zann, sides = "left")) | |
}) | |
ans$levels <- merge_zoolist(ans$annual, | |
.progress = myprogress("Merging annual levels to matrix")) | |
index(ans$levels) <- as.integer(index(ans$levels)) | |
## calculate time series of changes from the "now" year | |
## create a matrix of the same size as $levels with just the "now" data | |
levs.nowmatrix <- na.locf(merge(ans$levels[I(now),], zoo(,time(ans$levels))), | |
fromLast = TRUE, na.rm = FALSE) | |
ans$deltas <- window(ans$levels - levs.nowmatrix, end = now) | |
ans | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plot_all_timeseries <- function(tslist, ..., ident, | |
nperpage = 20, y.same = NA) | |
{ | |
label <- toString(deparse(substitute(tslist)), width = 20) | |
## generate pdf with all time series | |
pdf(paste(ident, "_timeseries.pdf", sep = ""), paper = "a4r", width = 10) | |
l_ply(1:ceiling(length(tslist)/nperpage), | |
.progress = myprogress(paste("Plotting", label, "time series to PDF")), | |
function(i) | |
{ | |
ii <- (i-1)*nperpage + 1:nperpage | |
ii <- ii[ii <= length(tslist)] | |
foo <- | |
xyplot(tslist[ii], ..., y.same = y.same) | |
try(print(foo)) | |
}) | |
dev.off() | |
} | |
plot_all_timecoverage <- function(tslist, ..., ident, nperpage = 50) | |
{ | |
label <- toString(deparse(substitute(tslist)), width = 20) | |
## generate pdf with all time series coverage | |
pdf(paste(ident, "_timecoverage.pdf", sep = ""), paper = "a4r", width = 10) | |
l_ply(1:ceiling(length(tslist)/nperpage), | |
.progress = myprogress(paste("Plotting", label, "time coverage to PDF")), | |
function(i) | |
{ | |
ii <- (i-1)*nperpage + 1:nperpage | |
ii <- ii[ii <= length(tslist)] | |
foo <- | |
xyplot(unname(tslist[ii]), layout = c(1,NA), | |
strip = FALSE, strip.left = FALSE, | |
ylab.right = list(names(tslist)[ii], cex = 0.4, rot = 0), | |
panel = panel.xblocks, | |
xlab = NULL, | |
xscale.components = xscale.components.subticks, | |
x.same = TRUE, | |
scales = list(y = list(draw = FALSE), | |
x = list(axs = "i", tick.number = 8))) | |
foo <- update(foo, ...) | |
try(print(foo)) | |
}) | |
dev.off() | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plot_distributions <- | |
function(tsinfo, ident, now, include.lines = FALSE) | |
{ | |
ans <- list() | |
## use all years with at least one observation | |
tsok <- (tsinfo$nperyear >= 1) | |
## plot distribution of available annual values over sites | |
ts.ann.n <- sapply(tsinfo$annual, function(x) sum(!is.na(coredata(x)))) | |
ans$npersite <- | |
ecdfplot(ts.ann.n, grid = TRUE, | |
xscale.components = xscale.components.log10ticks, | |
main = "Distribution of number of annual values", | |
ylab = "Empirical CDF (cumulative fraction of sites)", | |
xlab = "number of years available at each site") | |
## plot number of sites with at least 1 monthly value, each year | |
ans$sitesperyear <- | |
xyplot(zoo(apply(tsok, 1, sum), index(tsok)), | |
panel = panel.barchart, horizontal = FALSE, origin = 0, | |
ylab = "number of sites with usable annual estimates", xlab = NULL, | |
xscale.components = xscale.components.subticks) + | |
layer_(panel.grid(h = -1, v = 0)) | |
## how many sites have data in BOTH a 1-YEAR reference period and the most recent 1-YEAR period? | |
## (for several possible reference periods) | |
isok.1year.now <- tsok[I(now),] | |
isok.1year.each <- tsok | |
isok.1year.each[,!isok.1year.now] <- FALSE | |
ans$oksitesperyear <- | |
barchart(apply(isok.1year.each, 1, sum) ~ format(time(isok.1year.each)), | |
ylab = sprintf("number of sites with 1 year data\nin BOTH reference and current (%i)", now), | |
origin = 0, scales = list(x = list(rot = 90))) + | |
layer_(panel.grid(h = -1, v = 0)) | |
## how many sites have data in BOTH a 2-YEAR reference period and the most recent 2-YEAR period? | |
## (for several possible reference periods) | |
isok.2year.now <- aggregate(tsok[I(now-(1:0)),], rep(now-1, 2), all) | |
isok.2year.each <- rollapply(tsok, width = 2, all, align = "right") | |
isok.2year.each[,!isok.2year.now] <- FALSE | |
ans$oksitesper2years <- | |
barchart(apply(isok.2year.each, 1, sum) ~ format(time(isok.2year.each)), | |
ylab = sprintf("number of sites with 2 years data\nin BOTH reference and current (%s)", | |
paste(now-(1:0), collapse = "-")), | |
origin = 0, scales = list(x = list(rot = 90))) + | |
layer_(panel.grid(h = -1, v = 0)) | |
## plot distribution of changes over time | |
ans$deltas.box <- | |
plot_distribution_trend(tsinfo$deltas) | |
if (include.lines) { | |
ans$deltas.lines <- | |
plot_distribution_trend(tsinfo$deltas, with.lines = TRUE) | |
} | |
ans | |
} | |
plot_distribution_trend <- | |
function(deltas, ## e.g. vicinfo$deltas | |
..., | |
with.lines = FALSE) | |
{ | |
## plot distribution of changes over time | |
if (with.lines == FALSE) { | |
ydat <- stack(as.data.frame(t(deltas))) | |
## reverse time (to show "years ago") | |
yearlabels <- time(deltas) | |
ydat$ind <- factor(ydat$ind, levels = rev(time(deltas)), labels = rev(yearlabels)) | |
ans <- | |
bwplot(values ~ ind, ydat, ylim = c(-20,20), | |
pch = "|", varwidth = TRUE, par.settings = simpleTheme(cex = 0.7), | |
ylab = sprintf("change in water level from current (%s)", end(deltas)), | |
xlab = "reference year", | |
scales = list(x = list(rot = 90), y = list(tick.number = 9))) | |
} else { | |
## show individual sites as lines | |
ans <- | |
xyplot(deltas, screens = 1, | |
xlim = rev(range(time(deltas))), alpha = 0.2, | |
ylab = sprintf("change in water level from current (%s)", end(deltas)), | |
xlab = "reference year") | |
} | |
ans <- ans + | |
layer_(panel.grid(-1,-1, x = x, y = y), panel.abline(h=0)) + | |
layer(panel.points(1, 0, pch = 16, col = "black"), | |
panel.arrows(1, 2.5, 1, 0, length = 0.1), | |
panel.text(1, 3, '"current" (comparison point)', | |
srt = 90, pos = 4, offset = 0, cex = 0.7)) | |
ans | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## aggregate points (sites) into groundwater management units. | |
## NOTE can not use overlay() directly because it does not account for overlapping polygons! | |
## So loop through and apply overlay() to each "Polygons" element: | |
overlayEach <- function(spatialpoints, spatialpolys, ..., out.n = 1, fill = NA_real_) | |
{ | |
foo <- | |
ldply(spatialpolys@polygons, | |
.progress = myprogress("Spatial aggregation"), | |
function(p, ...) { | |
ans <- unlist(overlay(spatialpoints, SpatialPolygons(list(p)), ...)) | |
if (length(ans) == 0) rep(fill, length = out.n) else ans | |
}, ...) | |
rownames(foo) <- rownames(spatialpolys@data) | |
foo | |
} | |
## plot a SpatialPolygonsDataFrame and associated SpatialPointsDataFrame | |
## or a given subset (typically only 1) of the polygons | |
## (GMU = Groundwater Management Unit) | |
gmuLevelPlot <- | |
function(polydf, pointsdf, subset = NULL, ..., | |
colrange = c(-15, 15), | |
xlim = extendrange(bbox(subpolydf)[1, ], f = 0.04), | |
ylim = extendrange(bbox(subpolydf)[2, ], f = 0.04), | |
main = paste("Change in level (m) since", refyear), refyear, | |
xlab.top = toString(rownames(polydf@data)[subset]), | |
cities = cities.ok, | |
par.settings = custom.theme.2()) | |
{ | |
## limit range of data, for plotting | |
subcolrange <- extendrange(colrange, f = -0.01) | |
polydf@data[] <- lapply(polydf@data, function(x) | |
pmin(pmax(x, min(subcolrange)), max(subcolrange))) | |
colnames(pointsdf@data)[1] <- "value" | |
pointsdf@data$value <- | |
pmin(pmax(pointsdf@data$value, min(subcolrange)), max(subcolrange)) | |
subpolydf <- polydf | |
subpointsdf <- pointsdf | |
if (!is.null(subset)) { | |
subpolydf <- polydf[subset,] | |
inside <- !is.na(overlay(pointsdf, subpolydf)) | |
subpointsdf <- pointsdf[inside,] | |
} | |
result <- list() | |
if (!is.null(subset)) { | |
result$context <- | |
spplot(polydf[,1], main = main, colorkey = FALSE, | |
col = "grey", col.regions = NA) + | |
layer(panel.rect(min(xlim), min(ylim), max(xlim), max(ylim), | |
col = "yellow", alpha = 0.5), | |
sp.polygons(subpolydf), | |
data = environment()) | |
} | |
result$levels <- | |
spplot(subpolydf, ..., | |
at = seq(min(colrange), max(colrange), by = 1), | |
xlim = xlim, ylim = ylim, | |
lty = 0, as.table = TRUE, par.settings = par.settings, | |
main = main, xlab.top = xlab.top) + | |
layer_(panel.fill("grey90"), panel.grid(100,100, col = "white")) + | |
layer(panel.levelplot.points(x = Lng, y = Lat, z = value, | |
subscripts = TRUE, ..., cex = 0.5, | |
col.symbol = "transparent"), | |
## show translucent circles for all points (crosses for NAs) | |
grid.points(x = Lng, y = Lat, | |
pch = ifelse(is.na(value), 4, 1), | |
gp = gpar(cex = 1, alpha = 0.1, lwd = 0.1)), | |
data = as.data.frame(subpointsdf), | |
packets = 1) + | |
layer(panel.points(long, lat, pch = 15, cex = 0.6, col = "black"), | |
panel.text(long, lat, name, pos = 1, cex = 0.6), data = cities) + | |
layer(sp.polygons(subpolydf, lwd = 0.5, alpha = 0.5), data = environment()) | |
result | |
} | |
## take one polygon ('subset') from the SpatialPolygonsDataFrame | |
## (GMU = Groundwater Management Unit) | |
## with associated points from the given SpatialPointsDataFrame | |
## and generate smoothed surface over the points using gam(). | |
## also returns a context plot of the 'subset' polygon. | |
gmuLevelPlotGAM <- | |
function(polydf, pointsdf, subset = NULL, ..., | |
minimum.sites = 20, | |
colrange = c(-15, 15), | |
xlim = extendrange(bbox(subpolydf)[1, ], f = 0.04), | |
ylim = extendrange(bbox(subpolydf)[2, ], f = 0.04), | |
xlab.top = paste("Change in level (m) since", refyear), refyear, | |
main = toString(rownames(polydf@data)[subset]), | |
cities = cities.ok, | |
par.settings = custom.theme.2()) | |
{ | |
stopifnot("value" %in% colnames(pointsdf@data)) | |
subpolydf <- polydf | |
subpointsdf <- pointsdf | |
if (!is.null(subset)) { | |
subpolydf <- polydf[subset,] | |
inside <- !is.na(overlay(pointsdf, subpolydf)) | |
subpointsdf <- pointsdf[inside,] | |
} | |
sitesok <- !is.na(subpointsdf@data$value) | |
if (length(sitesok) < minimum.sites) { | |
stop("number of sites less than ", minimum.sites) | |
} | |
if (sum(sitesok) == 0) { | |
stop("no active sites (no non-missing values)") | |
} | |
## limit range of data, for plotting | |
subcolrange <- extendrange(colrange, f = -0.01) | |
subpolydf@data[] <- lapply(subpolydf@data, function(x) | |
pmin(pmax(x, min(subcolrange)), max(subcolrange))) | |
subpointsdf@data$value <- | |
pmin(pmax(subpointsdf@data$value, min(subcolrange)), max(subcolrange)) | |
result <- list() | |
if (!is.null(subset)) { | |
result$context <- | |
spplot(polydf[,1], main = main, colorkey = FALSE, | |
col = "grey", col.regions = NA) + | |
layer(panel.rect(min(xlim), min(ylim), max(xlim), max(ylim), | |
col = "yellow", alpha = 0.5), | |
sp.polygons(subpolydf), | |
data = environment()) | |
} | |
## fit a GAM and generate predictions on a regular grid inside the polygon | |
samp <- spsample(subpolydf, n = 200 * 200, type = "regular") | |
library(mgcv) | |
mod <- try(gam(value ~ s(Lng,Lat), data = as.data.frame(subpointsdf)), | |
silent = TRUE) | |
if (inherits(mod, "try-error")) { | |
## can't fit a GAM, so just fill with a constant | |
pred <- rep(median(subpointsdf@data$value, na.rm = TRUE), nrow(as.data.frame(samp))) | |
} else { | |
pred <- as.vector(predict(mod, with(as.data.frame(samp), list(Lng = x1, Lat = x2)))) | |
} | |
result$levels <- | |
spplot(subpolydf, ..., | |
at = seq(min(colrange), max(colrange), by = 1), | |
xlim = xlim, ylim = ylim, | |
par.settings = par.settings, | |
panel = function(...) NULL, | |
main = main, xlab.top = xlab.top) + | |
layer_(panel.fill("grey90"), panel.grid(100, 100, col = "white")) + | |
layer(panel.levelplot(x = x1, y = x2, z = pred, subscripts = TRUE, ...), | |
data = c(list(pred = pred), as.data.frame(samp))) + | |
layer(panel.levelplot.points(x = Lng, y = Lat, z = value, | |
subscripts = TRUE, ..., cex = 1, | |
col.symbol = "transparent"), | |
## show translucent circles for all points (crosses for NAs) | |
grid.points(x = Lng, y = Lat, | |
pch = ifelse(is.na(value), 4, 1), | |
gp = gpar(cex = 1, alpha = 0.3, lwd = 0.2)), | |
data = as.data.frame(subpointsdf), | |
packets = 1) + | |
layer(sp.polygons(subpolydf, lwd = 0.5, alpha = 0.5), data = environment()) | |
result | |
} | |
## horizonplot of time series (levelzoo) | |
## using all sites within the given polydf[subset,] | |
gmuHorizonplot <- | |
function(polydf, pointsdf, subset = NULL, ..., | |
levelzoo, ## e.g. vicinfo$levels | |
horizonscale = 5, | |
origin = function(y) mean(range(y, na.rm = TRUE))) | |
{ | |
subpolydf <- polydf | |
subpointsdf <- pointsdf | |
if (!is.null(subset)) { | |
subpolydf <- polydf[subset,] | |
inside <- !is.na(overlay(pointsdf, subpolydf)) | |
subpointsdf <- pointsdf[inside,] | |
} | |
if (nrow(subpointsdf) == 0) | |
stop("no values") | |
stopifnot(c("stationID", "longName") %in% colnames(pointsdf@data)) | |
siteids <- as.character(subpointsdf@data$stationID) | |
sitenames <- as.character(subpointsdf@data$longName) | |
sitenames <- substr(sitenames, 1, 18) | |
ord <- order(siteids) | |
siteids <- siteids[ord] | |
sitenames <- sitenames[ord] | |
result <- | |
horizonplot(0 - levelzoo[,siteids], strip.left = FALSE, | |
horizonscale = horizonscale, | |
origin = origin, | |
colorkey = list(space = "left"), layout = c(1, NA), | |
scales = list(x = list(axs = "i")), xlab = NULL, | |
par.settings = list(axis.line = list(col = "gray50")), | |
ylab = list(rev(siteids), rot = 0, cex = 0.3), | |
ylab.right = list(sitenames, rot = 0, cex = 0.3)) | |
update(result, ...) | |
} | |
## boxplots of distributions of changes over time (deltas) | |
## using all sites within the given polydf[subset,] | |
gmuDistributionTrend <- | |
function(polydf, pointsdf, subset = NULL, ..., | |
deltas) ## e.g. vicinfo$deltas | |
{ | |
subpolydf <- polydf | |
subpointsdf <- pointsdf | |
if (!is.null(subset)) { | |
subpolydf <- polydf[subset,] | |
inside <- !is.na(overlay(pointsdf, subpolydf)) | |
subpointsdf <- pointsdf[inside,] | |
} | |
if (nrow(subpointsdf) == 0) | |
stop("no values") | |
stopifnot(c("stationID", "longName") %in% colnames(pointsdf@data)) | |
siteids <- as.character(subpointsdf@data$stationID) | |
sitenames <- as.character(subpointsdf@data$longName) | |
sitenames <- substr(sitenames, 1, 18) | |
ord <- order(siteids) | |
siteids <- siteids[ord] | |
sitenames <- sitenames[ord] | |
plot_distribution_trend(deltas[,siteids], ...) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## process the data for NSW | |
## TODO: reading in the whole table may not be feasible on some machines. | |
## It would probably be better to read in data for each site using SQL? | |
nsw <- sqlFetch(conn, "NSW") | |
## convert to zoo and remove obvious bad values | |
nswlist <- | |
llply(split(1:nrow(nsw), nsw$stationID), | |
.progress = myprogress("Splitting by site and converting to zoo objects"), | |
function(ii) | |
{ | |
x <- nsw[ii,] | |
## remove zero values (from visual inspection this is necessary): | |
x <- subset(x, StandingWaterLevel != 0.0) | |
## TODO: negative values? | |
z <- suppressWarnings(zoo(x$StandingWaterLevel, x$observTime)) | |
}) | |
rm(nsw) | |
## remove series without any observations | |
nswlist <- nswlist[sapply(nswlist, function(x) sum(!is.na(coredata(x)))) > 0] | |
## remove all discontinued sites (those with no data after 2006) | |
nswlist <- nswlist[sapply(nswlist, end) >= as.POSIXct("2006-01-01")] | |
## distribution of start times | |
as.yearmon(quantile(do.call("c", lapply(nswlist, start)))) | |
## distribution of end times | |
as.yearmon(quantile(do.call("c", lapply(nswlist, end)))) | |
nswinfo <- gw_timeseries_process(nswlist, now = 2007) | |
save(nswinfo, file = "nswinfo.Rdata") | |
#load("nswinfo.Rdata") | |
## summary of changes each year (from current) | |
#summary(t(nswinfo$deltas)) | |
## plot up some samples of the monthly + annual series, interactively | |
devAskNewPage(TRUE) | |
#pdf("nsw_samples.pdf", paper = "a4r", width = 18, height = 10) | |
for (i in head(seq(0, length(nswinfo$monthly), length = 10), -1)) { | |
print(update(xyplot(nswinfo$monthly[1:20 + i], type = "b", cex = 0.7), | |
scales = list(y = "sliced", x = "same")) + | |
xyplot(nswinfo$annual[1:20 + i], type = "s", col = "red", lwd = 2)) | |
} | |
#dev.off() | |
devAskNewPage(FALSE) | |
plot_all_timeseries(nswinfo$monthly, ident = "nsw_monthly", | |
x.same = TRUE) | |
plot_all_timeseries(nswinfo$annual, ident = "nsw_annual", type = "s", | |
xlim = extendrange(c(1970,2010))) | |
## slow! | |
plot_all_timecoverage(nswinfo$monthly, ident = "nsw", | |
xlim = c(1970,2010)) | |
## the real stuff: | |
do_groundwater_analysis(nswinfo, state = "NSW") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment