Created
April 20, 2016 00:29
-
-
Save rBatt/821e2f21cb4d604824f5d31973933086 to your computer and use it in GitHub Desktop.
detect a breakpoint
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # ============ | |
| # = Packages = | |
| # ============ | |
| # to do everything in this script, make sure the following packages are installed | |
| # install.packages(c("rgenoud", "strucchange")) | |
| # ============= | |
| # = Functions = | |
| # ============= | |
| # this function makes predictions/ estimates | |
| # biomass is the x value | |
| # mu1 is the mean biomass before threshold is crossed | |
| # mu2 is the adjustment to mu1 to be made after threshold biomass is crossed | |
| # b_shift is the threshold in biomass | |
| pred_func <- function(biomass, mu1, mu2, b_shift){ | |
| beta <- matrix(c(mu1, mu2), nrow=2) | |
| X <- cbind(Intercept=1, meanShift=as.integer(biomass>b_shift)) | |
| c(X%*%beta) | |
| } | |
| # this function calculates | |
| # residual sum of squares, or negative log likelihood | |
| # calculation is for a given set of parameters, responses observations, and predictor observations | |
| # an optimization function will guess at different values of params to minimize the output value (rss or nll) | |
| # the params that minimize the output are the parameter estimates | |
| # the uncertainty of those estimates can either be derived from the Hessian from the optimizer, bootstrapping | |
| meanShift_nll <- function(params, y, x, fit_method=c("ols", "mle")){ | |
| method <- match.arg(fit_method) # makes ols default | |
| stopifnot(all(is.finite(y))) | |
| stopifnot(all(is.finite(x))) | |
| stopifnot(all(is.finite(params))) | |
| mu1 <- params[1] | |
| mu2 <- params[2] | |
| b_shift <- params[3] | |
| if(method=="mle"){ | |
| sigma <- params[4] | |
| } | |
| y_hat <- pred_func(x, mu1, mu2, b_shift) | |
| if(method=="ols"){ | |
| rss <- t(y-y_hat)%*%(y-y_hat) # same as sum((y-y_hat)^2) | |
| val <- rss | |
| }else if (method=="mle"){ | |
| nll <- -sum(dnorm(y, mean=y_hat, sd=sigma, log=TRUE)) | |
| # nll is same as | |
| # -1*(-0.5*n*log(2*pi) -0.5*n*log(sigma*sigma) - (1/(2*sigma*sigma))*sum((error)^2)) | |
| # where error <- y-y_hat and n <- length(y) | |
| val <- nll | |
| } | |
| return(val) | |
| } | |
| # =========================== | |
| # = Simulation/ True Values = | |
| # =========================== | |
| N <- 100 | |
| biomass <- seq(1, 100, length.out=N) | |
| mu1_true <- 1 | |
| mu2_true <- -0.5 | |
| b_shift_true <- mean(biomass) | |
| sigma_true <- 0.25 | |
| y_true <- pred_func(biomass, mu1_true, mu2_true, b_shift_true) | |
| y_obs <- y_true + rnorm(100, sd=sigma_true) | |
| # =========== | |
| # = Fitting = | |
| # =========== | |
| # ---- Fitting Setup ---- | |
| guesses <- c(mu1=0, mu2=0, b_shift=20, sigma=0.1) # starting values, if needed | |
| # some fitting methods also permit putting bounds on parameters | |
| # this can help a lot for getting reasonable estimates | |
| exp_fact <- 1 # just used to create more conservative bounds on parameters; can change however | |
| mean_extreme <- max(abs(y_obs))*exp_fact | |
| mins <- c(-mean_extreme, -mean_extreme, min(biomass), 1E-5) # minimal guesses for parameters, using data | |
| maxs <- c(mean_extreme, mean_extreme, max(biomass), 1E5) # max guesses | |
| dom <- cbind(mins, maxs) # matrix format | |
| # ---- Do Fitting with Base R package ---- | |
| # fit ols | |
| opt_ols <- optim(guesses[1:3], meanShift_nll, y=y_obs, x=biomass, fit_method="ols", lower=mins, upper=maxs, method="L-BFGS-B") | |
| # fit mle | |
| opt_mle <- optim(guesses, meanShift_nll, y=y_obs, x=biomass, fit_method="mle", lower=mins, upper=maxs, method="L-BFGS-B") | |
| # ---- Do Fitting with rgenoud Package ---- | |
| library("rgenoud") | |
| # fit ols | |
| gen_ols <- rgenoud::genoud(meanShift_nll, nvars=3, y=y_obs, x=biomass, fit_method="ols", Domain=dom[1:3,], print.level=1) | |
| # gen_ols <- rgenoud::genoud(meanShift_nll, nvars=3, y=y_obs, x=biomass, fit_method="ols", Domain=dom[1:3,], print.level=1, pop.size=2E3, solution.tolerance=0.1) | |
| # fit mle | |
| gen_mle <- rgenoud::genoud(meanShift_nll, nvars=4, y=y_obs, x=biomass, fit_method="mle", Domain=dom, print.level=1) | |
| # gen_mle <- rgenoud::genoud(meanShift_nll, nvars=4, y=y_obs, x=biomass, fit_method="mle", Domain=dom, print.level=1, pop.size=2E3, solution.tolerance=0.1) | |
| # ========================== | |
| # = Compare Results of Fit = | |
| # ========================== | |
| # ---- Use Fits to Get Predictions ---- | |
| opt_ols_hat <- pred_func(biomass, opt_ols$par[1], opt_ols$par[2], opt_ols$par[3]) | |
| opt_mle_hat <- pred_func(biomass, opt_mle$par[1], opt_mle$par[2], opt_mle$par[3]) | |
| gen_ols_hat <- pred_func(biomass, gen_ols$par[1], gen_ols$par[2], gen_ols$par[3]) | |
| gen_mle_hat <- pred_func(biomass, gen_mle$par[1], gen_mle$par[2], gen_mle$par[3]) | |
| # ---- Plot Observations and Fits ---- | |
| ylim <- range(c(y_obs, y_true, opt_ols_hat, opt_mle_hat, gen_ols_hat, gen_mle_hat)) | |
| cols2use <- c("pink", "red", "lightblue", "blue") | |
| plot(y_obs, ylim=ylim, xlab="biomass", ylab="response") | |
| lines(opt_ols_hat, col=cols2use[1], lwd=6) | |
| lines(opt_mle_hat, col=cols2use[2]) | |
| lines(gen_ols_hat, col=cols2use[3], lwd=6) | |
| lines(gen_mle_hat, col=cols2use[4]) | |
| legend("topright", legend=c("optim ols","optim mle", "genoud ols", "genoud mle"), lty=1, lwd=c(6,1,6,1), col=cols2use) | |
| # =================== | |
| # = Use strucchange = | |
| # =================== | |
| library("strucchange") | |
| dat <- data.frame(y_obs, biomass) | |
| sc <- breakpoints(y_obs~1, data=dat, breaks=1) | |
| coef(sc) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment