timcdlucas · October 4, 2019 09:28
diff --git a/dist_to_point.R b/dist_to_point.R

 #'---
 #'output:
 #'  pdf_document:
 #'    number_sections: true
 #'    toc: true
 #'    toc_depth: 2
 #'title: "RandomForest with distance to points"
 #'author: Tim Lucas
 #'fontsize: 8pt
 #'geometry: margin=0.5in
 #'---


 #+ libs

 library(caret)
 library(INLA)
 library(ggplot2)

 #+ simulate_data

 data <- data.frame(x = seq(0, 1, length.out = 200),
                   train = rep(c(T, F), each = 100))

 data$y <- sin(data$x * 10) + rnorm(200, 0, 0.2)


 #+ first_look

 ggplot(data, aes(x, y, colour = train)) + 
  geom_point()



 #+ fit_gp

 INLA:::inla.dynload.workaround()

 mesh <- inla.mesh.1d(data$x, degree = 2)

 sigma0 = 0.1
 kappa0 = 5
 tau0 = 1 / (4 * kappa0^3 * sigma0^2)^0.5

 spde = inla.spde2.matern(mesh, constr = FALSE,
                         B.tau = cbind(log(tau0), 1),
                         B.kappa = cbind(log(kappa0), 0),
                         theta.prior.prec = 1e-4)

 A = inla.spde.make.A(mesh, loc = data$x[data$train])
 x.index = inla.spde.make.index("x", n.spde = spde$n.spde)
 stack = inla.stack(data = list(y = data$y[data$train]),
                   A = list(A),
                   effects = list(x.index),
                   tag = "est")

 Apred = inla.spde.make.A(mesh, loc = data$x[!data$train])
 stack_pred = inla.stack(data = list(y = rep(NA, length = 100)),
                       A = list(Apred),
                       effects = list(x.index),
                       tag = "pred")

 stacks = inla.stack.join(stack, stack_pred)


 formula = y ~ -1 + f(x, model = spde)

 dat <- inla.stack.data(stacks)

 result = inla(formula, family = "gaussian", data = dat,
               control.predictor = list(A = inla.stack.A(stacks),
                                        link = 1,
                                        compute = TRUE), 
              verbose = FALSE)

 autoplot(result)
 result$summary.fitted.values[1:200, 'mean'] %>% plot


 data$inla_pred <- result$summary.fitted.values[1:200, 'mean']

 #+ plot_gp

 ggplot(data, aes(x, y, colour = train)) + 
  geom_point() +
  geom_line(aes(y = inla_pred), colour = 'red')




 #+ fit_rf

 rf1 <- train(y = data$y[1:100], x = data[1:100, 'x', drop = FALSE],
             method = 'rf',
             tuneLength = 1,
             trControl = trainControl(number = 3))

 data$rf1 <- predict(rf1, newdata = data)


 #+ plot_rf1
 ggplot(data, aes(x, y, colour = train)) + 
  geom_point() +
  geom_line(aes(y = inla_pred), colour = 'red') +
  geom_line(aes(y = rf1), colour = 'blue')



 #+ fit_rf_dist

 xdist <- as.matrix(dist(data$x))


 rf2 <- train(y = data$y[1:100], x = xdist[1:100, 1:100],
             method = 'rf',
             tuneLength = 5,
             trControl = trainControl(number = 3))

 data$rf2 <- predict(rf2, newdata = xdist[, 1:100])


 #+ plot_rf2
 ggplot(data, aes(x, y, colour = train)) + 
  geom_point() +
  geom_line(aes(y = inla_pred), colour = 'red') +
  geom_line(aes(y = rf1), colour = 'blue')  +
  geom_line(aes(y = rf2), colour = 'purple') +
  ggtitle('Red: gp, blue: rf, purple: rf_distance_topoints')

	#'---
	#'output:
	#' pdf_document:
	#' number_sections: true
	#' toc: true
	#' toc_depth: 2
	#'title: "RandomForest with distance to points"
	#'author: Tim Lucas
	#'fontsize: 8pt
	#'geometry: margin=0.5in
	#'---


	#+ libs

	library(caret)
	library(INLA)
	library(ggplot2)

	#+ simulate_data

	data <- data.frame(x = seq(0, 1, length.out = 200),
	train = rep(c(T, F), each = 100))

	data$y <- sin(data$x * 10) + rnorm(200, 0, 0.2)


	#+ first_look

	ggplot(data, aes(x, y, colour = train)) +
	geom_point()



	#+ fit_gp

	INLA:::inla.dynload.workaround()

	mesh <- inla.mesh.1d(data$x, degree = 2)

	sigma0 = 0.1
	kappa0 = 5
	tau0 = 1 / (4 * kappa0^3 * sigma0^2)^0.5

	spde = inla.spde2.matern(mesh, constr = FALSE,
	B.tau = cbind(log(tau0), 1),
	B.kappa = cbind(log(kappa0), 0),
	theta.prior.prec = 1e-4)

	A = inla.spde.make.A(mesh, loc = data$x[data$train])
	x.index = inla.spde.make.index("x", n.spde = spde$n.spde)
	stack = inla.stack(data = list(y = data$y[data$train]),
	A = list(A),
	effects = list(x.index),
	tag = "est")

	Apred = inla.spde.make.A(mesh, loc = data$x[!data$train])
	stack_pred = inla.stack(data = list(y = rep(NA, length = 100)),
	A = list(Apred),
	effects = list(x.index),
	tag = "pred")

	stacks = inla.stack.join(stack, stack_pred)


	formula = y ~ -1 + f(x, model = spde)

	dat <- inla.stack.data(stacks)

	result = inla(formula, family = "gaussian", data = dat,
	control.predictor = list(A = inla.stack.A(stacks),
	link = 1,
	compute = TRUE),
	verbose = FALSE)

	autoplot(result)
	result$summary.fitted.values[1:200, 'mean'] %>% plot


	data$inla_pred <- result$summary.fitted.values[1:200, 'mean']

	#+ plot_gp

	ggplot(data, aes(x, y, colour = train)) +
	geom_point() +
	geom_line(aes(y = inla_pred), colour = 'red')




	#+ fit_rf

	rf1 <- train(y = data$y[1:100], x = data[1:100, 'x', drop = FALSE],
	method = 'rf',
	tuneLength = 1,
	trControl = trainControl(number = 3))

	data$rf1 <- predict(rf1, newdata = data)


	#+ plot_rf1
	ggplot(data, aes(x, y, colour = train)) +
	geom_point() +
	geom_line(aes(y = inla_pred), colour = 'red') +
	geom_line(aes(y = rf1), colour = 'blue')



	#+ fit_rf_dist

	xdist <- as.matrix(dist(data$x))


	rf2 <- train(y = data$y[1:100], x = xdist[1:100, 1:100],
	method = 'rf',
	tuneLength = 5,
	trControl = trainControl(number = 3))

	data$rf2 <- predict(rf2, newdata = xdist[, 1:100])


	#+ plot_rf2
	ggplot(data, aes(x, y, colour = train)) +
	geom_point() +
	geom_line(aes(y = inla_pred), colour = 'red') +
	geom_line(aes(y = rf1), colour = 'blue') +
	geom_line(aes(y = rf2), colour = 'purple') +
	ggtitle('Red: gp, blue: rf, purple: rf_distance_topoints')