Created
August 29, 2014 21:16
-
-
Save emhart/62cd4ee4034ceb2fc363 to your computer and use it in GitHub Desktop.
for Iain
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(MASS) | |
library(lars) | |
library(lavaan) | |
####Code to wrap lasso and extra CV... | |
### Taken from: http://www4.stat.ncsu.edu/~boos/var.select/lasso.dr1.txt | |
lasso.dr1<-function(x,y,kfold=5,rep=10,plot=T,grid=seq(from=0,to=1,length=100)){ | |
# wrapper for lasso in lars, with modifications to cv.lars | |
require(lars) | |
# set.seed(seed) | |
ok<-complete.cases(x,y) | |
x<-x[ok,] # get rid of na's | |
y<-y[ok] # since regsubsets can't handle na's | |
m<-ncol(x) | |
n<-nrow(x) | |
as.matrix(x)->x | |
cvout<-cv.lars1(x,y,K=kfold,plot.it=F,fraction=grid) | |
sumcv<-cvout$cv | |
frac<-cvout$fraction | |
if(rep >1){ | |
for(i in 1:(rep-1)){ | |
sumcv<-sumcv+cv.lars1(x,y,K=kfold,plot.it=F,fraction=grid)$cv | |
} # ends for | |
} # ends if | |
sumcv<-sumcv/rep | |
if(plot) plot(frac,sumcv) | |
sfrac<-frac[which.min(sumcv)] # assume frac stays the same | |
object<-lars(x,y,type="lasso") | |
fit <- predict.lars(object,x,s=sfrac,type="fit",mode="fraction")$fit | |
coeff<-predict.lars(object,x,s=sfrac,type="coef",mode="fraction")$coefficients | |
st<-sum(coeff !=0) # number nonzero | |
mse<-sum((y-fit)^2)/(n-st-1) | |
colnames(x)<-colnames(x,do.NULL=F,prefix="") # corrects for no colnames | |
x<-x[,colnames(x)[which(coeff !=0)]] | |
cat("",fill=T) | |
cat("K (num. of folds)=",kfold,fill=T) | |
cat("Num. of reps)=",rep,fill=T) | |
cat("Fraction from CV=",sfrac,fill=T) | |
cat("number nonzero=",st,fill=T) | |
cat("mse=",mse,fill=T) | |
cat("Variables Selected =",colnames(x),fill=T) | |
return(list(l.object=object,frac=frac,fit=fit,coeff=coeff,st=st,mse=mse,sumcv=sumcv)) | |
} | |
### taken from: http://www4.stat.ncsu.edu/~boos/var.select/cv.lars1.txt | |
cv.lars1<- function (x, y, K = 10, fraction = seq(from = 0, to = 1, length = 100), trace = FALSE, plot.it = TRUE, se = TRUE) { | |
# Howard Bondell modification of cv.lars | |
### add this code | |
full.fit <- lars(x, y, trace = trace) | |
betas <- full.fit$beta | |
sbetas <- scale(betas, FALSE, 1/full.fit$normx) | |
kp <- dim(betas) | |
k <- kp[1] | |
p <- kp[2] | |
norm.bound <- fraction*(abs(sbetas[k,]) %*% rep(1, p)) | |
### | |
all.folds <- cv.folds(length(y), K) | |
residmat <- matrix(0, length(fraction), K) | |
for (i in seq(K)) { | |
omit <- all.folds[[i]] | |
fit <- lars(x[-omit, ], y[-omit], trace = trace) | |
### fit <- predict(fit, x[omit, , drop = FALSE], mode = "fraction", | |
### s = fraction)$fit | |
### Replace this line with | |
fit <- predict.lars1(fit, x[omit, , drop = FALSE], mode = "norm", | |
s = norm.bound)$fit | |
### | |
if (length(omit) == 1) | |
fit <- matrix(fit, nrow = 1) | |
residmat[, i] <- apply((y[omit] - fit)^2, 2, mean) | |
if (trace) | |
cat("\n CV Fold", i, "\n\n") | |
} | |
cv <- apply(residmat, 1, mean) | |
cv.error <- sqrt(apply(residmat, 1, var)/K) | |
object <- list(fraction = fraction, cv = cv, cv.error = cv.error) | |
if (plot.it) | |
plotCVLars(object, se = se) | |
invisible(object) | |
} | |
predict.lars1 <- function (object, newx, s, type = c("fit", "coefficients"), mode = c("step", "fraction", "norm")) { | |
mode <- match.arg(mode) | |
type <- match.arg(type) | |
if (missing(newx) & type == "fit") { | |
warning("Type=fit with no newx argument; type switched to coefficients") | |
type <- "coefficients" | |
} | |
betas <- object$beta | |
sbetas <- scale(betas, FALSE, 1/object$normx) | |
kp <- dim(betas) | |
k <- kp[1] | |
p <- kp[2] | |
steps <- seq(k) | |
if (missing(s)) { | |
s <- steps | |
mode <- "step" | |
} | |
sbeta <- switch(mode, step = { | |
if (any(s < 0) | any(s > k)) | |
stop("Argument s out of range") | |
steps | |
}, fraction = { | |
if (any(s > 1) | any(s < 0)) | |
stop("Argument s out of range") | |
nbeta <- drop(abs(sbetas) %*% rep(1, p)) | |
nbeta/nbeta[k] | |
### This part is changed | |
}, norm = { | |
nbeta <- drop(abs(sbetas) %*% rep(1, p)) | |
if (any(s < 0)) | |
stop("Argument s out of range") | |
nbeta | |
}) | |
s[s>nbeta[k]] <- nbeta[k] | |
### | |
sfrac <- (s - sbeta[1])/(sbeta[k] - sbeta[1]) | |
sbeta <- (sbeta - sbeta[1])/(sbeta[k] - sbeta[1]) | |
usbeta <- unique(sbeta) | |
useq <- match(usbeta, sbeta) | |
sbeta <- sbeta[useq] | |
betas <- betas[useq, ] | |
coord <- approx(sbeta, seq(sbeta), sfrac)$y | |
left <- floor(coord) | |
right <- ceiling(coord) | |
newbetas <- ((sbeta[right] - sfrac) * betas[left, , drop = FALSE] + | |
(sfrac - sbeta[left]) * betas[right, , drop = FALSE])/(sbeta[right] - | |
sbeta[left]) | |
newbetas[left == right, ] <- betas[left[left == right], ] | |
robject <- switch(type, coefficients = list(s = s, fraction = sfrac, | |
mode = mode, coefficients = drop(newbetas)), fit = list(s = s, | |
fraction = sfrac, mode = mode, fit = drop(scale(newx, | |
object$meanx, FALSE) %*% t(newbetas)) + object$mu)) | |
robject | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment