Skip to content

Instantly share code, notes, and snippets.

@floybix
Created August 11, 2010 07:13
Show Gist options
  • Save floybix/518612 to your computer and use it in GitHub Desktop.
Save floybix/518612 to your computer and use it in GitHub Desktop.
groundwater database analysis scripts
## groundwater database analysis scripts
## Felix Andrews <[email protected]> 2010-08-01
library(RODBC)
library(zoo)
library(lattice)
library(latticeExtra)
library(sp)
library(rgdal)
library(mgcv)
library(plyr) ## for progress bars
library(maps) ## for world.cities
stopifnot(packageDescription("zoo")$Version >= "1.7")
stopifnot(packageDescription("lattice")$Version >= "0.19")
stopifnot(packageDescription("latticeExtra")$Version >= "0.6-14")
lattice.options(default.theme = custom.theme.2())
## use Windows progress bar. otherwise change this to progress_text or progress_tk
myprogress <- function(title = "Progress") progress_win(title)
setwd("X:/Projects/BOMGW")
conn <- odbcConnectAccess("MdbCsv/water.mdb")
sqlTables(conn)
locs <- sqlFetch(conn, "Locations")
locs$DrilledDpth <- as.numeric(gsub(",", "", locs$DrilledDpth))
gmus <- readOGR("agmu_r9nnd_00311a01e_sgeo___", "gmushape")
rownames(gmus@data) <- make.unique(as.character(gmus@data$GMU_NAME))
#rivers <- readOGR("river_reaches/arc_reach_shape", "arc_reach")
#rivers <- rivers[,c("STATE", "BASIN", "A", "M")]
#save(rivers, file = "rivers.Rdata")
load("rivers.Rdata")
data(world.cities)
cities.all <- subset(world.cities, country.etc == "Australia")
cities.ok <- subset(world.cities, country.etc == "Australia" & (pop >= 50000) |
(name %in% c("Darwin", "Alice Springs", "Kalgoolie", "Broome", "Walgett",
"Mount Isa", "Esperance", "Broken Hill", "Coober Pedy",
"Bendigo", "Ballarat", "Sale", "Mildura"))) # "Echuca-Moama"
source("01_script.R")
source("02_timeseries.R")
source("03_plot_all_sites.R")
source("04_plot_distributions.R")
source("05_plot_spatial.R")
## now you are ready to run "0_nsw.R" etc
## groundwater database analysis scripts
## Felix Andrews <[email protected]> 2010-08-01
do_groundwater_analysis <-
function(infolist, state = "VIC", ident = state,
refyears = c(1985, 1990, 1995), now = 2007,
gmus.state = gmus[gmus$STATE == state,],
rivers.state = rivers[rivers$STATE == state,],
do.what = c("state", "gmus"))
{
if (!file.exists(ident)) dir.create(ident)
message("generating distribution plots")
png(sprintf("%s/%s_distributions_%%01d.png", ident, ident),
width = 3000, height = 1500, res = 300)
print(plot_distributions(infolist, ident = ident, now = now))
dev.off()
## write out differences from current as shapefile (points)
message("writing points to shapefile")
deltas <- as.data.frame(t(infolist$deltas))
deltas$stationID <- rownames(deltas)
pt.deltas <- merge(locs, deltas)
pt.deltas <- subset(pt.deltas, Lng > 120)
## need to keep column names to at most 10 characters for writeOGR
colnames(pt.deltas) <- substring(colnames(pt.deltas), first = 1, last = 10)
pt.deltas <- SpatialPointsDataFrame(coords = pt.deltas[,c("Lng","Lat")],
data = pt.deltas)
writeOGR(pt.deltas,
sprintf("%s/%s_gw_trends", ident, ident),
"sites_deltas",
driver = "ESRI Shapefile")
if ("state" %in% do.what) {
for (refyear in refyears) {
message("aggregating to GMUs for reference year ", refyear)
## calculate quantiles of changes over the set of bores in each GMU
qqtmp <-
overlayEach(pt.deltas[,toString(refyear)], gmus.state,
fn = function(x) {
qq <- quantile(x[,1], c(0.5, 0.05, 0.01), na.rm = TRUE)
names(qq) <- c("median", "worst_5pc", "worst_1pc") ## max 10 chars!
qq[["n_values"]] <- sum(!is.na(x[,1]))
qq
},
out.n = 4)
ii.na <- is.na(qqtmp[,"n_values"])
qqtmp[ii.na, "n_values"] <- 0
## write out aggregated stats as shapefile (polygons)
gmus.ref <- gmus.state
gmus.ref@data <- cbind(qqtmp, gmus.ref@data)
writeOGR(gmus.ref,
sprintf("%s/%s_gw_trends", ident, ident),
sprintf("gmus_%i", refyear),
driver = "ESRI Shapefile")
pt.deltas.ref <- pt.deltas[,c(refyear, "stationID", "longName")]
colnames(pt.deltas.ref@data)[1] <- "value"
## generate maps (quantiles of changes in GMUs)
foo <-
gmuLevelPlot(gmus.ref[,1:4], pt.deltas.ref, refyear = refyear,
xlab.top = ident)
##pdf(sprintf("%s/%s_deltas%i_gmus.pdf", ident, ident, refyear),
## paper = "a4r", width = 18, height = 10)
##print(foo)
##dev.off()
png(sprintf("%s/%s_deltas%i_gmus.png", ident, ident, refyear),
width = 6000, height = 3000, res = 300)
print(foo)
dev.off()
}
}
if ("gmus" %in% do.what) {
## generate detailed results for each GMU
l_ply(seq(1, nrow(gmus.state)),
.progress = myprogress("generating detailed results for each GMU"),
function (i)
{
gmuname <- as.character(gmus.state@data[i, "GMU_NAME"])
gmuno <- as.character(gmus.state@data[i, "GMU_NO"])
message(gmuno, gmuname)
gmunameok <- make.names(substr(gmuname, 1, 15))
nameok <- paste(sprintf("%s/%s", ident, ident), gmuno, gmunameok, sep = "_")
##pdf(paste(nameok, ".pdf", sep = ""), paper = "a4r", width = 18, height = 10)
png(paste(nameok, "%01d.png", sep = ""), width = 3000, height = 1500, res = 300)
anyOK <- FALSE
for (refyear in refyears) {
pt.deltas.ref <- pt.deltas[,c(toString(refyear), "stationID", "longName")]
colnames(pt.deltas.ref@data)[1] <- "value"
foo <-
tryCatch(gmuLevelPlotGAM(gmus.state[,1], pt.deltas.ref, subset = i,
refyear = refyear, cities = cities.all),
error = force)
if (inherits(foo, "error")) {
message(foo$message)
next
} else {
if (anyOK == FALSE) ## only once:
print(foo$context)
anyOK <- TRUE
}
print(foo$levels +
layer(sp.lines(rivers.state, lwd = 2, col = "blue", alpha = 0.5),
data = environment())
)
}
if (anyOK) {
foo <-
tryCatch(gmuHorizonplot(gmus.state[,1], pt.deltas, subset = i,
levelzoo = infolist$levels),
error = force)
if (inherits(foo, "error")) {
message(foo$message)
} else {
print(foo)
print(gmuDistributionTrend(gmus.state[,1], pt.deltas, subset = i,
deltas = infolist$deltas))
}
}
dev.off()
})
}
}
## merge a list of zoo objects (and put all on the same time scale).
## return the result as a merged zoo matrix, indexed by seq(start, end, by = by)
## This is similar to do.call("merge", tslist) but that can fail due to memory usage.
merge_zoolist <- function(tslist, ..., by = 1L)
{
start <- do.call(min, lapply(tslist, start))
end <- do.call(max, lapply(tslist, end))
newindex <- seq(start, end, by = by)
## put each series on the same index and extract core data
coreagg <- llply(tslist, function(z) {
if (length(z) == 0)
z <- zoo(1) ## need this, otherwise merged series has 0 columns
coredata(merge(z, zoo(, newindex), all = c(FALSE, TRUE)))
}, ...)
ans <- matrix(unlist(coreagg, use.names = FALSE),
ncol = length(tslist), nrow = length(newindex))
colnames(ans) <- names(tslist)
zoo(ans, newindex)
}
gw_timeseries_process <- function(tslist, now)
{
ans <- list()
message("Processing time series...")
ntasks <- 4
task.i <- 0
## aggregate to regular monthly (median) series
task.i <- task.i + 1
message("Task ", task.i, " out of ", ntasks)
ans$monthly <-
llply(tslist,
.progress = myprogress("Aggregating (by medians) to regular monthly series"),
function(z)
{
z <- aggregate(z, as.yearmon,
function(x) median(coredata(x), na.rm = TRUE))
t <- as.yearmon(seq(as.Date(start(z)), as.Date(end(z)), by = "months"))
merge(z, zoo(,t), all = c(FALSE, TRUE))
})
## calculate number of (monthly) observations in each calendar year for each site
task.i <- task.i + 1
message("Task ", task.i, " out of ", ntasks)
nperyear <- llply(ans$monthly,
.progress = myprogress("Counting number of observations each year"),
function(z)
{
aggregate(z, as.integer, function(z) sum(complete.cases(z)))
})
nperyear <-
merge_zoolist(nperyear, .progress = myprogress("Merging observations per year to matrix"))
coredata(nperyear)[coredata(is.na(nperyear))] <- 0
ans$nperyear <- nperyear
## calculate seasonal fluctuations
task.i <- task.i + 1
message("Task ", task.i, " out of ", ntasks)
ans$fluct <-
unlist(llply(ans$monthly,
.progress = myprogress("Calculating seasonal fluctuations"),
function(z)
{
if (sum(!is.na(z)) < 10) return(NA)
## first, remove a linear trend
# y <- coredata(z)
# x <- as.numeric(index(z))
# coredata(z) <- residuals(MASS::rlm(y ~ x, na.action = na.exclude))
median(aggregate(z, floor, function(z) {
tmp <- diff(range(z))
if (identical(tmp, 0)) NA else tmp
}), na.rm = TRUE)
}))
## calculate annual values (median of monthly values).
## BUT for years with only a single (monthly) value,
## replace with the median over at least 3 values in a 5-year window.
## -- to avoid influence of errors in single data points
task.i <- task.i + 1
message("Task ", task.i, " out of ", ntasks)
ans$annual <-
llply(ans$monthly,
.progress = myprogress("Aggregating (by medians) to annual values; rolling median"),
function(z)
{
## 1st step: calculate medians for each year
## Note floor() applied to 'yearmon' gives the year (for aggregation)
zann <-
aggregate(z, floor, function(x) {
median(coredata(x), na.rm = TRUE)
})
znum <-
aggregate(z, floor, function(x) {
sum(complete.cases(x))
})
## 2nd step: rolling median over chunks of 5 data-years
tmp <- na.omit(zann)
width.odd <- length(tmp) - (1 - length(tmp)%%2)
zrollmed <-
rollapply(tmp, width = min(5, width.odd),
median, na.pad = TRUE)
zrollmed <- merge(zrollmed, zoo(,time(zann)))
## 3rd step: use the rolling medians to fill in
## years which have fewer than 3 monthly observations.
zann[znum < 3] <- zrollmed[znum < 3]
## locf = last observation carry forward
na.locf(na.trim(zann, sides = "left"))
})
ans$levels <- merge_zoolist(ans$annual,
.progress = myprogress("Merging annual levels to matrix"))
index(ans$levels) <- as.integer(index(ans$levels))
## calculate time series of changes from the "now" year
## create a matrix of the same size as $levels with just the "now" data
levs.nowmatrix <- na.locf(merge(ans$levels[I(now),], zoo(,time(ans$levels))),
fromLast = TRUE, na.rm = FALSE)
ans$deltas <- window(ans$levels - levs.nowmatrix, end = now)
ans
}
plot_all_timeseries <- function(tslist, ..., ident,
nperpage = 20, y.same = NA)
{
label <- toString(deparse(substitute(tslist)), width = 20)
## generate pdf with all time series
pdf(paste(ident, "_timeseries.pdf", sep = ""), paper = "a4r", width = 10)
l_ply(1:ceiling(length(tslist)/nperpage),
.progress = myprogress(paste("Plotting", label, "time series to PDF")),
function(i)
{
ii <- (i-1)*nperpage + 1:nperpage
ii <- ii[ii <= length(tslist)]
foo <-
xyplot(tslist[ii], ..., y.same = y.same)
try(print(foo))
})
dev.off()
}
plot_all_timecoverage <- function(tslist, ..., ident, nperpage = 50)
{
label <- toString(deparse(substitute(tslist)), width = 20)
## generate pdf with all time series coverage
pdf(paste(ident, "_timecoverage.pdf", sep = ""), paper = "a4r", width = 10)
l_ply(1:ceiling(length(tslist)/nperpage),
.progress = myprogress(paste("Plotting", label, "time coverage to PDF")),
function(i)
{
ii <- (i-1)*nperpage + 1:nperpage
ii <- ii[ii <= length(tslist)]
foo <-
xyplot(unname(tslist[ii]), layout = c(1,NA),
strip = FALSE, strip.left = FALSE,
ylab.right = list(names(tslist)[ii], cex = 0.4, rot = 0),
panel = panel.xblocks,
xlab = NULL,
xscale.components = xscale.components.subticks,
x.same = TRUE,
scales = list(y = list(draw = FALSE),
x = list(axs = "i", tick.number = 8)))
foo <- update(foo, ...)
try(print(foo))
})
dev.off()
}
plot_distributions <-
function(tsinfo, ident, now, include.lines = FALSE)
{
ans <- list()
## use all years with at least one observation
tsok <- (tsinfo$nperyear >= 1)
## plot distribution of available annual values over sites
ts.ann.n <- sapply(tsinfo$annual, function(x) sum(!is.na(coredata(x))))
ans$npersite <-
ecdfplot(ts.ann.n, grid = TRUE,
xscale.components = xscale.components.log10ticks,
main = "Distribution of number of annual values",
ylab = "Empirical CDF (cumulative fraction of sites)",
xlab = "number of years available at each site")
## plot number of sites with at least 1 monthly value, each year
ans$sitesperyear <-
xyplot(zoo(apply(tsok, 1, sum), index(tsok)),
panel = panel.barchart, horizontal = FALSE, origin = 0,
ylab = "number of sites with usable annual estimates", xlab = NULL,
xscale.components = xscale.components.subticks) +
layer_(panel.grid(h = -1, v = 0))
## how many sites have data in BOTH a 1-YEAR reference period and the most recent 1-YEAR period?
## (for several possible reference periods)
isok.1year.now <- tsok[I(now),]
isok.1year.each <- tsok
isok.1year.each[,!isok.1year.now] <- FALSE
ans$oksitesperyear <-
barchart(apply(isok.1year.each, 1, sum) ~ format(time(isok.1year.each)),
ylab = sprintf("number of sites with 1 year data\nin BOTH reference and current (%i)", now),
origin = 0, scales = list(x = list(rot = 90))) +
layer_(panel.grid(h = -1, v = 0))
## how many sites have data in BOTH a 2-YEAR reference period and the most recent 2-YEAR period?
## (for several possible reference periods)
isok.2year.now <- aggregate(tsok[I(now-(1:0)),], rep(now-1, 2), all)
isok.2year.each <- rollapply(tsok, width = 2, all, align = "right")
isok.2year.each[,!isok.2year.now] <- FALSE
ans$oksitesper2years <-
barchart(apply(isok.2year.each, 1, sum) ~ format(time(isok.2year.each)),
ylab = sprintf("number of sites with 2 years data\nin BOTH reference and current (%s)",
paste(now-(1:0), collapse = "-")),
origin = 0, scales = list(x = list(rot = 90))) +
layer_(panel.grid(h = -1, v = 0))
## plot distribution of changes over time
ans$deltas.box <-
plot_distribution_trend(tsinfo$deltas)
if (include.lines) {
ans$deltas.lines <-
plot_distribution_trend(tsinfo$deltas, with.lines = TRUE)
}
ans
}
plot_distribution_trend <-
function(deltas, ## e.g. vicinfo$deltas
...,
with.lines = FALSE)
{
## plot distribution of changes over time
if (with.lines == FALSE) {
ydat <- stack(as.data.frame(t(deltas)))
## reverse time (to show "years ago")
yearlabels <- time(deltas)
ydat$ind <- factor(ydat$ind, levels = rev(time(deltas)), labels = rev(yearlabels))
ans <-
bwplot(values ~ ind, ydat, ylim = c(-20,20),
pch = "|", varwidth = TRUE, par.settings = simpleTheme(cex = 0.7),
ylab = sprintf("change in water level from current (%s)", end(deltas)),
xlab = "reference year",
scales = list(x = list(rot = 90), y = list(tick.number = 9)))
} else {
## show individual sites as lines
ans <-
xyplot(deltas, screens = 1,
xlim = rev(range(time(deltas))), alpha = 0.2,
ylab = sprintf("change in water level from current (%s)", end(deltas)),
xlab = "reference year")
}
ans <- ans +
layer_(panel.grid(-1,-1, x = x, y = y), panel.abline(h=0)) +
layer(panel.points(1, 0, pch = 16, col = "black"),
panel.arrows(1, 2.5, 1, 0, length = 0.1),
panel.text(1, 3, '"current" (comparison point)',
srt = 90, pos = 4, offset = 0, cex = 0.7))
ans
}
## aggregate points (sites) into groundwater management units.
## NOTE can not use overlay() directly because it does not account for overlapping polygons!
## So loop through and apply overlay() to each "Polygons" element:
overlayEach <- function(spatialpoints, spatialpolys, ..., out.n = 1, fill = NA_real_)
{
foo <-
ldply(spatialpolys@polygons,
.progress = myprogress("Spatial aggregation"),
function(p, ...) {
ans <- unlist(overlay(spatialpoints, SpatialPolygons(list(p)), ...))
if (length(ans) == 0) rep(fill, length = out.n) else ans
}, ...)
rownames(foo) <- rownames(spatialpolys@data)
foo
}
## plot a SpatialPolygonsDataFrame and associated SpatialPointsDataFrame
## or a given subset (typically only 1) of the polygons
## (GMU = Groundwater Management Unit)
gmuLevelPlot <-
function(polydf, pointsdf, subset = NULL, ...,
colrange = c(-15, 15),
xlim = extendrange(bbox(subpolydf)[1, ], f = 0.04),
ylim = extendrange(bbox(subpolydf)[2, ], f = 0.04),
main = paste("Change in level (m) since", refyear), refyear,
xlab.top = toString(rownames(polydf@data)[subset]),
cities = cities.ok,
par.settings = custom.theme.2())
{
## limit range of data, for plotting
subcolrange <- extendrange(colrange, f = -0.01)
polydf@data[] <- lapply(polydf@data, function(x)
pmin(pmax(x, min(subcolrange)), max(subcolrange)))
colnames(pointsdf@data)[1] <- "value"
pointsdf@data$value <-
pmin(pmax(pointsdf@data$value, min(subcolrange)), max(subcolrange))
subpolydf <- polydf
subpointsdf <- pointsdf
if (!is.null(subset)) {
subpolydf <- polydf[subset,]
inside <- !is.na(overlay(pointsdf, subpolydf))
subpointsdf <- pointsdf[inside,]
}
result <- list()
if (!is.null(subset)) {
result$context <-
spplot(polydf[,1], main = main, colorkey = FALSE,
col = "grey", col.regions = NA) +
layer(panel.rect(min(xlim), min(ylim), max(xlim), max(ylim),
col = "yellow", alpha = 0.5),
sp.polygons(subpolydf),
data = environment())
}
result$levels <-
spplot(subpolydf, ...,
at = seq(min(colrange), max(colrange), by = 1),
xlim = xlim, ylim = ylim,
lty = 0, as.table = TRUE, par.settings = par.settings,
main = main, xlab.top = xlab.top) +
layer_(panel.fill("grey90"), panel.grid(100,100, col = "white")) +
layer(panel.levelplot.points(x = Lng, y = Lat, z = value,
subscripts = TRUE, ..., cex = 0.5,
col.symbol = "transparent"),
## show translucent circles for all points (crosses for NAs)
grid.points(x = Lng, y = Lat,
pch = ifelse(is.na(value), 4, 1),
gp = gpar(cex = 1, alpha = 0.1, lwd = 0.1)),
data = as.data.frame(subpointsdf),
packets = 1) +
layer(panel.points(long, lat, pch = 15, cex = 0.6, col = "black"),
panel.text(long, lat, name, pos = 1, cex = 0.6), data = cities) +
layer(sp.polygons(subpolydf, lwd = 0.5, alpha = 0.5), data = environment())
result
}
## take one polygon ('subset') from the SpatialPolygonsDataFrame
## (GMU = Groundwater Management Unit)
## with associated points from the given SpatialPointsDataFrame
## and generate smoothed surface over the points using gam().
## also returns a context plot of the 'subset' polygon.
gmuLevelPlotGAM <-
function(polydf, pointsdf, subset = NULL, ...,
minimum.sites = 20,
colrange = c(-15, 15),
xlim = extendrange(bbox(subpolydf)[1, ], f = 0.04),
ylim = extendrange(bbox(subpolydf)[2, ], f = 0.04),
xlab.top = paste("Change in level (m) since", refyear), refyear,
main = toString(rownames(polydf@data)[subset]),
cities = cities.ok,
par.settings = custom.theme.2())
{
stopifnot("value" %in% colnames(pointsdf@data))
subpolydf <- polydf
subpointsdf <- pointsdf
if (!is.null(subset)) {
subpolydf <- polydf[subset,]
inside <- !is.na(overlay(pointsdf, subpolydf))
subpointsdf <- pointsdf[inside,]
}
sitesok <- !is.na(subpointsdf@data$value)
if (length(sitesok) < minimum.sites) {
stop("number of sites less than ", minimum.sites)
}
if (sum(sitesok) == 0) {
stop("no active sites (no non-missing values)")
}
## limit range of data, for plotting
subcolrange <- extendrange(colrange, f = -0.01)
subpolydf@data[] <- lapply(subpolydf@data, function(x)
pmin(pmax(x, min(subcolrange)), max(subcolrange)))
subpointsdf@data$value <-
pmin(pmax(subpointsdf@data$value, min(subcolrange)), max(subcolrange))
result <- list()
if (!is.null(subset)) {
result$context <-
spplot(polydf[,1], main = main, colorkey = FALSE,
col = "grey", col.regions = NA) +
layer(panel.rect(min(xlim), min(ylim), max(xlim), max(ylim),
col = "yellow", alpha = 0.5),
sp.polygons(subpolydf),
data = environment())
}
## fit a GAM and generate predictions on a regular grid inside the polygon
samp <- spsample(subpolydf, n = 200 * 200, type = "regular")
library(mgcv)
mod <- try(gam(value ~ s(Lng,Lat), data = as.data.frame(subpointsdf)),
silent = TRUE)
if (inherits(mod, "try-error")) {
## can't fit a GAM, so just fill with a constant
pred <- rep(median(subpointsdf@data$value, na.rm = TRUE), nrow(as.data.frame(samp)))
} else {
pred <- as.vector(predict(mod, with(as.data.frame(samp), list(Lng = x1, Lat = x2))))
}
result$levels <-
spplot(subpolydf, ...,
at = seq(min(colrange), max(colrange), by = 1),
xlim = xlim, ylim = ylim,
par.settings = par.settings,
panel = function(...) NULL,
main = main, xlab.top = xlab.top) +
layer_(panel.fill("grey90"), panel.grid(100, 100, col = "white")) +
layer(panel.levelplot(x = x1, y = x2, z = pred, subscripts = TRUE, ...),
data = c(list(pred = pred), as.data.frame(samp))) +
layer(panel.levelplot.points(x = Lng, y = Lat, z = value,
subscripts = TRUE, ..., cex = 1,
col.symbol = "transparent"),
## show translucent circles for all points (crosses for NAs)
grid.points(x = Lng, y = Lat,
pch = ifelse(is.na(value), 4, 1),
gp = gpar(cex = 1, alpha = 0.3, lwd = 0.2)),
data = as.data.frame(subpointsdf),
packets = 1) +
layer(sp.polygons(subpolydf, lwd = 0.5, alpha = 0.5), data = environment())
result
}
## horizonplot of time series (levelzoo)
## using all sites within the given polydf[subset,]
gmuHorizonplot <-
function(polydf, pointsdf, subset = NULL, ...,
levelzoo, ## e.g. vicinfo$levels
horizonscale = 5,
origin = function(y) mean(range(y, na.rm = TRUE)))
{
subpolydf <- polydf
subpointsdf <- pointsdf
if (!is.null(subset)) {
subpolydf <- polydf[subset,]
inside <- !is.na(overlay(pointsdf, subpolydf))
subpointsdf <- pointsdf[inside,]
}
if (nrow(subpointsdf) == 0)
stop("no values")
stopifnot(c("stationID", "longName") %in% colnames(pointsdf@data))
siteids <- as.character(subpointsdf@data$stationID)
sitenames <- as.character(subpointsdf@data$longName)
sitenames <- substr(sitenames, 1, 18)
ord <- order(siteids)
siteids <- siteids[ord]
sitenames <- sitenames[ord]
result <-
horizonplot(0 - levelzoo[,siteids], strip.left = FALSE,
horizonscale = horizonscale,
origin = origin,
colorkey = list(space = "left"), layout = c(1, NA),
scales = list(x = list(axs = "i")), xlab = NULL,
par.settings = list(axis.line = list(col = "gray50")),
ylab = list(rev(siteids), rot = 0, cex = 0.3),
ylab.right = list(sitenames, rot = 0, cex = 0.3))
update(result, ...)
}
## boxplots of distributions of changes over time (deltas)
## using all sites within the given polydf[subset,]
gmuDistributionTrend <-
function(polydf, pointsdf, subset = NULL, ...,
deltas) ## e.g. vicinfo$deltas
{
subpolydf <- polydf
subpointsdf <- pointsdf
if (!is.null(subset)) {
subpolydf <- polydf[subset,]
inside <- !is.na(overlay(pointsdf, subpolydf))
subpointsdf <- pointsdf[inside,]
}
if (nrow(subpointsdf) == 0)
stop("no values")
stopifnot(c("stationID", "longName") %in% colnames(pointsdf@data))
siteids <- as.character(subpointsdf@data$stationID)
sitenames <- as.character(subpointsdf@data$longName)
sitenames <- substr(sitenames, 1, 18)
ord <- order(siteids)
siteids <- siteids[ord]
sitenames <- sitenames[ord]
plot_distribution_trend(deltas[,siteids], ...)
}
## process the data for NSW
## TODO: reading in the whole table may not be feasible on some machines.
## It would probably be better to read in data for each site using SQL?
nsw <- sqlFetch(conn, "NSW")
## convert to zoo and remove obvious bad values
nswlist <-
llply(split(1:nrow(nsw), nsw$stationID),
.progress = myprogress("Splitting by site and converting to zoo objects"),
function(ii)
{
x <- nsw[ii,]
## remove zero values (from visual inspection this is necessary):
x <- subset(x, StandingWaterLevel != 0.0)
## TODO: negative values?
z <- suppressWarnings(zoo(x$StandingWaterLevel, x$observTime))
})
rm(nsw)
## remove series without any observations
nswlist <- nswlist[sapply(nswlist, function(x) sum(!is.na(coredata(x)))) > 0]
## remove all discontinued sites (those with no data after 2006)
nswlist <- nswlist[sapply(nswlist, end) >= as.POSIXct("2006-01-01")]
## distribution of start times
as.yearmon(quantile(do.call("c", lapply(nswlist, start))))
## distribution of end times
as.yearmon(quantile(do.call("c", lapply(nswlist, end))))
nswinfo <- gw_timeseries_process(nswlist, now = 2007)
save(nswinfo, file = "nswinfo.Rdata")
#load("nswinfo.Rdata")
## summary of changes each year (from current)
#summary(t(nswinfo$deltas))
## plot up some samples of the monthly + annual series, interactively
devAskNewPage(TRUE)
#pdf("nsw_samples.pdf", paper = "a4r", width = 18, height = 10)
for (i in head(seq(0, length(nswinfo$monthly), length = 10), -1)) {
print(update(xyplot(nswinfo$monthly[1:20 + i], type = "b", cex = 0.7),
scales = list(y = "sliced", x = "same")) +
xyplot(nswinfo$annual[1:20 + i], type = "s", col = "red", lwd = 2))
}
#dev.off()
devAskNewPage(FALSE)
plot_all_timeseries(nswinfo$monthly, ident = "nsw_monthly",
x.same = TRUE)
plot_all_timeseries(nswinfo$annual, ident = "nsw_annual", type = "s",
xlim = extendrange(c(1970,2010)))
## slow!
plot_all_timecoverage(nswinfo$monthly, ident = "nsw",
xlim = c(1970,2010))
## the real stuff:
do_groundwater_analysis(nswinfo, state = "NSW")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment