Skip to content

Instantly share code, notes, and snippets.

@timcdlucas
Created October 4, 2019 09:28
Show Gist options
  • Save timcdlucas/c5c4d2fb2ef957bde5914ab2e583676a to your computer and use it in GitHub Desktop.
Save timcdlucas/c5c4d2fb2ef957bde5914ab2e583676a to your computer and use it in GitHub Desktop.
A markdown script for looking at geographic random forests.
#'---
#'output:
#' pdf_document:
#' number_sections: true
#' toc: true
#' toc_depth: 2
#'title: "RandomForest with distance to points"
#'author: Tim Lucas
#'fontsize: 8pt
#'geometry: margin=0.5in
#'---
#+ libs
library(caret)
library(INLA)
library(ggplot2)
#+ simulate_data
data <- data.frame(x = seq(0, 1, length.out = 200),
train = rep(c(T, F), each = 100))
data$y <- sin(data$x * 10) + rnorm(200, 0, 0.2)
#+ first_look
ggplot(data, aes(x, y, colour = train)) +
geom_point()
#+ fit_gp
INLA:::inla.dynload.workaround()
mesh <- inla.mesh.1d(data$x, degree = 2)
sigma0 = 0.1
kappa0 = 5
tau0 = 1 / (4 * kappa0^3 * sigma0^2)^0.5
spde = inla.spde2.matern(mesh, constr = FALSE,
B.tau = cbind(log(tau0), 1),
B.kappa = cbind(log(kappa0), 0),
theta.prior.prec = 1e-4)
A = inla.spde.make.A(mesh, loc = data$x[data$train])
x.index = inla.spde.make.index("x", n.spde = spde$n.spde)
stack = inla.stack(data = list(y = data$y[data$train]),
A = list(A),
effects = list(x.index),
tag = "est")
Apred = inla.spde.make.A(mesh, loc = data$x[!data$train])
stack_pred = inla.stack(data = list(y = rep(NA, length = 100)),
A = list(Apred),
effects = list(x.index),
tag = "pred")
stacks = inla.stack.join(stack, stack_pred)
formula = y ~ -1 + f(x, model = spde)
dat <- inla.stack.data(stacks)
result = inla(formula, family = "gaussian", data = dat,
control.predictor = list(A = inla.stack.A(stacks),
link = 1,
compute = TRUE),
verbose = FALSE)
autoplot(result)
result$summary.fitted.values[1:200, 'mean'] %>% plot
data$inla_pred <- result$summary.fitted.values[1:200, 'mean']
#+ plot_gp
ggplot(data, aes(x, y, colour = train)) +
geom_point() +
geom_line(aes(y = inla_pred), colour = 'red')
#+ fit_rf
rf1 <- train(y = data$y[1:100], x = data[1:100, 'x', drop = FALSE],
method = 'rf',
tuneLength = 1,
trControl = trainControl(number = 3))
data$rf1 <- predict(rf1, newdata = data)
#+ plot_rf1
ggplot(data, aes(x, y, colour = train)) +
geom_point() +
geom_line(aes(y = inla_pred), colour = 'red') +
geom_line(aes(y = rf1), colour = 'blue')
#+ fit_rf_dist
xdist <- as.matrix(dist(data$x))
rf2 <- train(y = data$y[1:100], x = xdist[1:100, 1:100],
method = 'rf',
tuneLength = 5,
trControl = trainControl(number = 3))
data$rf2 <- predict(rf2, newdata = xdist[, 1:100])
#+ plot_rf2
ggplot(data, aes(x, y, colour = train)) +
geom_point() +
geom_line(aes(y = inla_pred), colour = 'red') +
geom_line(aes(y = rf1), colour = 'blue') +
geom_line(aes(y = rf2), colour = 'purple') +
ggtitle('Red: gp, blue: rf, purple: rf_distance_topoints')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment