Skip to content

Instantly share code, notes, and snippets.

@rBatt
Created April 20, 2016 00:29
Show Gist options
  • Select an option

  • Save rBatt/821e2f21cb4d604824f5d31973933086 to your computer and use it in GitHub Desktop.

Select an option

Save rBatt/821e2f21cb4d604824f5d31973933086 to your computer and use it in GitHub Desktop.
detect a breakpoint
# ============
# = Packages =
# ============
# to do everything in this script, make sure the following packages are installed
# install.packages(c("rgenoud", "strucchange"))
# =============
# = Functions =
# =============
# this function makes predictions/ estimates
# biomass is the x value
# mu1 is the mean biomass before threshold is crossed
# mu2 is the adjustment to mu1 to be made after threshold biomass is crossed
# b_shift is the threshold in biomass
pred_func <- function(biomass, mu1, mu2, b_shift){
beta <- matrix(c(mu1, mu2), nrow=2)
X <- cbind(Intercept=1, meanShift=as.integer(biomass>b_shift))
c(X%*%beta)
}
# this function calculates
# residual sum of squares, or negative log likelihood
# calculation is for a given set of parameters, responses observations, and predictor observations
# an optimization function will guess at different values of params to minimize the output value (rss or nll)
# the params that minimize the output are the parameter estimates
# the uncertainty of those estimates can either be derived from the Hessian from the optimizer, bootstrapping
meanShift_nll <- function(params, y, x, fit_method=c("ols", "mle")){
method <- match.arg(fit_method) # makes ols default
stopifnot(all(is.finite(y)))
stopifnot(all(is.finite(x)))
stopifnot(all(is.finite(params)))
mu1 <- params[1]
mu2 <- params[2]
b_shift <- params[3]
if(method=="mle"){
sigma <- params[4]
}
y_hat <- pred_func(x, mu1, mu2, b_shift)
if(method=="ols"){
rss <- t(y-y_hat)%*%(y-y_hat) # same as sum((y-y_hat)^2)
val <- rss
}else if (method=="mle"){
nll <- -sum(dnorm(y, mean=y_hat, sd=sigma, log=TRUE))
# nll is same as
# -1*(-0.5*n*log(2*pi) -0.5*n*log(sigma*sigma) - (1/(2*sigma*sigma))*sum((error)^2))
# where error <- y-y_hat and n <- length(y)
val <- nll
}
return(val)
}
# ===========================
# = Simulation/ True Values =
# ===========================
N <- 100
biomass <- seq(1, 100, length.out=N)
mu1_true <- 1
mu2_true <- -0.5
b_shift_true <- mean(biomass)
sigma_true <- 0.25
y_true <- pred_func(biomass, mu1_true, mu2_true, b_shift_true)
y_obs <- y_true + rnorm(100, sd=sigma_true)
# ===========
# = Fitting =
# ===========
# ---- Fitting Setup ----
guesses <- c(mu1=0, mu2=0, b_shift=20, sigma=0.1) # starting values, if needed
# some fitting methods also permit putting bounds on parameters
# this can help a lot for getting reasonable estimates
exp_fact <- 1 # just used to create more conservative bounds on parameters; can change however
mean_extreme <- max(abs(y_obs))*exp_fact
mins <- c(-mean_extreme, -mean_extreme, min(biomass), 1E-5) # minimal guesses for parameters, using data
maxs <- c(mean_extreme, mean_extreme, max(biomass), 1E5) # max guesses
dom <- cbind(mins, maxs) # matrix format
# ---- Do Fitting with Base R package ----
# fit ols
opt_ols <- optim(guesses[1:3], meanShift_nll, y=y_obs, x=biomass, fit_method="ols", lower=mins, upper=maxs, method="L-BFGS-B")
# fit mle
opt_mle <- optim(guesses, meanShift_nll, y=y_obs, x=biomass, fit_method="mle", lower=mins, upper=maxs, method="L-BFGS-B")
# ---- Do Fitting with rgenoud Package ----
library("rgenoud")
# fit ols
gen_ols <- rgenoud::genoud(meanShift_nll, nvars=3, y=y_obs, x=biomass, fit_method="ols", Domain=dom[1:3,], print.level=1)
# gen_ols <- rgenoud::genoud(meanShift_nll, nvars=3, y=y_obs, x=biomass, fit_method="ols", Domain=dom[1:3,], print.level=1, pop.size=2E3, solution.tolerance=0.1)
# fit mle
gen_mle <- rgenoud::genoud(meanShift_nll, nvars=4, y=y_obs, x=biomass, fit_method="mle", Domain=dom, print.level=1)
# gen_mle <- rgenoud::genoud(meanShift_nll, nvars=4, y=y_obs, x=biomass, fit_method="mle", Domain=dom, print.level=1, pop.size=2E3, solution.tolerance=0.1)
# ==========================
# = Compare Results of Fit =
# ==========================
# ---- Use Fits to Get Predictions ----
opt_ols_hat <- pred_func(biomass, opt_ols$par[1], opt_ols$par[2], opt_ols$par[3])
opt_mle_hat <- pred_func(biomass, opt_mle$par[1], opt_mle$par[2], opt_mle$par[3])
gen_ols_hat <- pred_func(biomass, gen_ols$par[1], gen_ols$par[2], gen_ols$par[3])
gen_mle_hat <- pred_func(biomass, gen_mle$par[1], gen_mle$par[2], gen_mle$par[3])
# ---- Plot Observations and Fits ----
ylim <- range(c(y_obs, y_true, opt_ols_hat, opt_mle_hat, gen_ols_hat, gen_mle_hat))
cols2use <- c("pink", "red", "lightblue", "blue")
plot(y_obs, ylim=ylim, xlab="biomass", ylab="response")
lines(opt_ols_hat, col=cols2use[1], lwd=6)
lines(opt_mle_hat, col=cols2use[2])
lines(gen_ols_hat, col=cols2use[3], lwd=6)
lines(gen_mle_hat, col=cols2use[4])
legend("topright", legend=c("optim ols","optim mle", "genoud ols", "genoud mle"), lty=1, lwd=c(6,1,6,1), col=cols2use)
# ===================
# = Use strucchange =
# ===================
library("strucchange")
dat <- data.frame(y_obs, biomass)
sc <- breakpoints(y_obs~1, data=dat, breaks=1)
coef(sc)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment