Skip to content

Instantly share code, notes, and snippets.

@emhart
Created August 29, 2014 21:16
Show Gist options
  • Save emhart/62cd4ee4034ceb2fc363 to your computer and use it in GitHub Desktop.
Save emhart/62cd4ee4034ceb2fc363 to your computer and use it in GitHub Desktop.
for Iain
library(MASS)
library(lars)
library(lavaan)
####Code to wrap lasso and extra CV...
### Taken from: http://www4.stat.ncsu.edu/~boos/var.select/lasso.dr1.txt
lasso.dr1<-function(x,y,kfold=5,rep=10,plot=T,grid=seq(from=0,to=1,length=100)){
# wrapper for lasso in lars, with modifications to cv.lars
require(lars)
# set.seed(seed)
ok<-complete.cases(x,y)
x<-x[ok,] # get rid of na's
y<-y[ok] # since regsubsets can't handle na's
m<-ncol(x)
n<-nrow(x)
as.matrix(x)->x
cvout<-cv.lars1(x,y,K=kfold,plot.it=F,fraction=grid)
sumcv<-cvout$cv
frac<-cvout$fraction
if(rep >1){
for(i in 1:(rep-1)){
sumcv<-sumcv+cv.lars1(x,y,K=kfold,plot.it=F,fraction=grid)$cv
} # ends for
} # ends if
sumcv<-sumcv/rep
if(plot) plot(frac,sumcv)
sfrac<-frac[which.min(sumcv)] # assume frac stays the same
object<-lars(x,y,type="lasso")
fit <- predict.lars(object,x,s=sfrac,type="fit",mode="fraction")$fit
coeff<-predict.lars(object,x,s=sfrac,type="coef",mode="fraction")$coefficients
st<-sum(coeff !=0) # number nonzero
mse<-sum((y-fit)^2)/(n-st-1)
colnames(x)<-colnames(x,do.NULL=F,prefix="") # corrects for no colnames
x<-x[,colnames(x)[which(coeff !=0)]]
cat("",fill=T)
cat("K (num. of folds)=",kfold,fill=T)
cat("Num. of reps)=",rep,fill=T)
cat("Fraction from CV=",sfrac,fill=T)
cat("number nonzero=",st,fill=T)
cat("mse=",mse,fill=T)
cat("Variables Selected =",colnames(x),fill=T)
return(list(l.object=object,frac=frac,fit=fit,coeff=coeff,st=st,mse=mse,sumcv=sumcv))
}
### taken from: http://www4.stat.ncsu.edu/~boos/var.select/cv.lars1.txt
cv.lars1<- function (x, y, K = 10, fraction = seq(from = 0, to = 1, length = 100), trace = FALSE, plot.it = TRUE, se = TRUE) {
# Howard Bondell modification of cv.lars
### add this code
full.fit <- lars(x, y, trace = trace)
betas <- full.fit$beta
sbetas <- scale(betas, FALSE, 1/full.fit$normx)
kp <- dim(betas)
k <- kp[1]
p <- kp[2]
norm.bound <- fraction*(abs(sbetas[k,]) %*% rep(1, p))
###
all.folds <- cv.folds(length(y), K)
residmat <- matrix(0, length(fraction), K)
for (i in seq(K)) {
omit <- all.folds[[i]]
fit <- lars(x[-omit, ], y[-omit], trace = trace)
### fit <- predict(fit, x[omit, , drop = FALSE], mode = "fraction",
### s = fraction)$fit
### Replace this line with
fit <- predict.lars1(fit, x[omit, , drop = FALSE], mode = "norm",
s = norm.bound)$fit
###
if (length(omit) == 1)
fit <- matrix(fit, nrow = 1)
residmat[, i] <- apply((y[omit] - fit)^2, 2, mean)
if (trace)
cat("\n CV Fold", i, "\n\n")
}
cv <- apply(residmat, 1, mean)
cv.error <- sqrt(apply(residmat, 1, var)/K)
object <- list(fraction = fraction, cv = cv, cv.error = cv.error)
if (plot.it)
plotCVLars(object, se = se)
invisible(object)
}
predict.lars1 <- function (object, newx, s, type = c("fit", "coefficients"), mode = c("step", "fraction", "norm")) {
mode <- match.arg(mode)
type <- match.arg(type)
if (missing(newx) & type == "fit") {
warning("Type=fit with no newx argument; type switched to coefficients")
type <- "coefficients"
}
betas <- object$beta
sbetas <- scale(betas, FALSE, 1/object$normx)
kp <- dim(betas)
k <- kp[1]
p <- kp[2]
steps <- seq(k)
if (missing(s)) {
s <- steps
mode <- "step"
}
sbeta <- switch(mode, step = {
if (any(s < 0) | any(s > k))
stop("Argument s out of range")
steps
}, fraction = {
if (any(s > 1) | any(s < 0))
stop("Argument s out of range")
nbeta <- drop(abs(sbetas) %*% rep(1, p))
nbeta/nbeta[k]
### This part is changed
}, norm = {
nbeta <- drop(abs(sbetas) %*% rep(1, p))
if (any(s < 0))
stop("Argument s out of range")
nbeta
})
s[s>nbeta[k]] <- nbeta[k]
###
sfrac <- (s - sbeta[1])/(sbeta[k] - sbeta[1])
sbeta <- (sbeta - sbeta[1])/(sbeta[k] - sbeta[1])
usbeta <- unique(sbeta)
useq <- match(usbeta, sbeta)
sbeta <- sbeta[useq]
betas <- betas[useq, ]
coord <- approx(sbeta, seq(sbeta), sfrac)$y
left <- floor(coord)
right <- ceiling(coord)
newbetas <- ((sbeta[right] - sfrac) * betas[left, , drop = FALSE] +
(sfrac - sbeta[left]) * betas[right, , drop = FALSE])/(sbeta[right] -
sbeta[left])
newbetas[left == right, ] <- betas[left[left == right], ]
robject <- switch(type, coefficients = list(s = s, fraction = sfrac,
mode = mode, coefficients = drop(newbetas)), fit = list(s = s,
fraction = sfrac, mode = mode, fit = drop(scale(newx,
object$meanx, FALSE) %*% t(newbetas)) + object$mu))
robject
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment