Last active
April 28, 2021 15:43
-
-
Save gavinsimpson/6d05af9186b9f9419cca5a4507af3aa0 to your computer and use it in GitHub Desktop.
R code to download, extract, and fit a Tweedie GAM to monthly rainfall total time series from Nuuk, Greenland, using mgcv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Process precip data for Nuuk | |
## Packages | |
library("tibble") | |
library("readr") | |
library("tidyr") | |
library("curl") | |
## Data - data are at ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/v2/v2.prcp.Z | |
## file is compressed | |
url <- "ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/v2/v2.prcp.Z" | |
tmp <- "./v2.prcp.Z" | |
curl_download(url, tmp, quiet = FALSE) | |
## At this point you'll need to uncompress the x-compress file you just downloaded, | |
## and extract the `v2.prcp` file to the working (or other) directory. | |
## There is an R package `uncompress` that could do this for you, but it was removed | |
## from CRAN at some point; the last version archived there is from 2012. | |
path.2.v2.prcp <- "./v2.prcp" | |
## data have 16 chars, then twelve groups of 5 chars | |
## the first 16 chars contain relevant info and need extracting into separate variables on reading | |
precip <- read_fwf(path.2.v2.prcp, | |
fwf_widths(c(3, 5, 3, 1, 4, rep(5, 12)), | |
col_names = c("Country","WMOID","Modifier","Duplicate","Year", month.abb)), | |
na = c("-9999","-8888")) # -8888 means a trace, which I ignore (naughty!) | |
# -9999 is straight up missing | |
## subset only the bits we want | |
precip <- precip[precip$WMOID == 4250L, ] # 4250 is the WMO station ID for Nuuk (Godthaab) | |
## gather the month cols into a single key value pair | |
precip <- gather(precip, Month, Precip, -Country, -WMOID, -Modifier, -Duplicate, -Year) | |
## add a numeric column for month | |
months <- 1:12 | |
names(months) <- month.abb | |
precip <- add_column(precip, nMonth = unname(months[precip$Month]), .after = "Month") | |
## Drop some other stuff | |
precip <- precip[, c("WMOID", "Year", "Month", "nMonth", "Precip")] | |
## Save this to RDS | |
write_rds(precip, "./nuuk-precip-data.rds") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Load and analyse precip data for Nuuk | |
## Packages | |
library("mgcv") | |
library("ggplot2") | |
library("viridis") | |
theme_set(theme_bw()) | |
## Data - data are in ./nuuk-precip-data.rds | |
## This file can be created by running ./data-prep.R *interactively* | |
precip <- readRDS("./nuuk-precip-data.rds") | |
## gam.control object | |
ctrl <- gam.control() | |
## ctrl <- gam.control(nthreads = 4) # uncomment if you can use multiple threads | |
## This coerces precip to a data.frame - no tibbles from now on | |
precip <- transform(precip, Precip = Precip / 10) # Precip data are in 1/10 of a mm | |
## model will be a Tweedie GAM | |
## te() version as interaction required | |
m <- gam(Precip ~ te(nMonth, Year, bs = c("cc", "tp"), k = c(12, 35)), | |
data = precip, knots = list(nMonth = c(0.5, 12.5)), | |
family = tw, method = "REML", select = TRUE, control = ctrl) | |
summary(m) | |
gam.check(m) | |
plot(m, scheme = 2, n = 200, tran = family(m)$linkinv) | |
## Look at the fitted model | |
ilink <- family(m)$linkinv # this is the inverse of the link function, as an R function | |
## Generate new data to predict at; to get smooth plots use ~ 100 evenly spaced values for each covariate | |
pdata <- with(precip, expand.grid(Year = seq(min(Year), max(Year), length.out = 100), | |
nMonth = seq(1, 12, length.out = 100))) | |
## Add on the fitted values and SEs *on the log [link] scale* | |
pdata <- cbind(pdata, as.data.frame(predict(m, newdata = pdata, type = "link", se.fit = TRUE))) | |
## Compute 95% confidence interval on link scale & back transform this & model fit to response scale | |
pdata <- transform(pdata, | |
Upper = ilink(fit + (1.96 * se.fit)), | |
Lower = ilink(fit - (1.96 * se.fit)), | |
Fitted = ilink(fit)) | |
## plot | |
p1 <- ggplot(pdata, aes(x = nMonth, y = Fitted, colour = Year, group = factor(Year))) + | |
geom_line() + | |
scale_color_viridis(option = "plasma", end = 0.95) + | |
labs(title = "Modelled change in monthly total precipitation at Nuuk, Greenland, 1874–2016", | |
subtitle = "Increase in rainfall & marked change in within-year distribution", | |
caption = "Data: www.ncdc.noaa.gov/ghcnm/v2.php", | |
x = NULL, | |
y = "Estimated total precipitation (mm)") + | |
scale_x_continuous(breaks = 1:12, minor_breaks = NULL, labels = month.abb) | |
p1 | |
ggsave("modelled-nuuk-rainfall.png", p1, dpi = 100, width = 8.5, height = 5.5) |
Hello, I cannot download the data, does it still exist?
@AliciaHamer The data appear to be here ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/v2/ with different naming conventions but perhaps a nicer compression system such that we can open them directly from within R. If you try the new source and it doesn't work (i.e. the internal file structure has changed), let me know and I'll take a look at updating this to provide working code.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Awesome. Thank you very much for the explanation!