emhart · August 29, 2014 21:16
diff --git a/LarsCode.R b/LarsCode.R
 library(MASS)
 library(lars)
 library(lavaan)

 ####Code to wrap lasso and extra CV...
 ### Taken from: http://www4.stat.ncsu.edu/~boos/var.select/lasso.dr1.txt

 lasso.dr1<-function(x,y,kfold=5,rep=10,plot=T,grid=seq(from=0,to=1,length=100)){
  # wrapper for lasso in lars, with modifications to cv.lars
  require(lars)
  # set.seed(seed)
  ok<-complete.cases(x,y)
  x<-x[ok,]                            # get rid of na's
  y<-y[ok]                             # since regsubsets can't handle na's
  m<-ncol(x)
  n<-nrow(x)
  as.matrix(x)->x
  cvout<-cv.lars1(x,y,K=kfold,plot.it=F,fraction=grid)
  sumcv<-cvout$cv
  frac<-cvout$fraction
  if(rep >1){
    for(i in 1:(rep-1)){
      sumcv<-sumcv+cv.lars1(x,y,K=kfold,plot.it=F,fraction=grid)$cv
    }   # ends for
  }   # ends if
  sumcv<-sumcv/rep
  if(plot) plot(frac,sumcv)
  sfrac<-frac[which.min(sumcv)]       # assume frac stays the same
  object<-lars(x,y,type="lasso")
  fit <- predict.lars(object,x,s=sfrac,type="fit",mode="fraction")$fit
  coeff<-predict.lars(object,x,s=sfrac,type="coef",mode="fraction")$coefficients
  st<-sum(coeff !=0)                                          # number nonzero
  mse<-sum((y-fit)^2)/(n-st-1)
  colnames(x)<-colnames(x,do.NULL=F,prefix="")      # corrects for no colnames
  x<-x[,colnames(x)[which(coeff !=0)]]
  cat("",fill=T)
  cat("K (num. of folds)=",kfold,fill=T)
  cat("Num. of reps)=",rep,fill=T)
  cat("Fraction from CV=",sfrac,fill=T)
  cat("number nonzero=",st,fill=T)
  cat("mse=",mse,fill=T)
  cat("Variables Selected =",colnames(x),fill=T)
  return(list(l.object=object,frac=frac,fit=fit,coeff=coeff,st=st,mse=mse,sumcv=sumcv))
 }

 ### taken from: http://www4.stat.ncsu.edu/~boos/var.select/cv.lars1.txt

 cv.lars1<- function (x, y, K = 10, fraction = seq(from = 0, to = 1, length = 100), trace = FALSE, plot.it = TRUE, se = TRUE) {
  # Howard Bondell modification of cv.lars
  ### add this code
  
  full.fit <- lars(x, y, trace = trace)
  betas <- full.fit$beta
  sbetas <- scale(betas, FALSE, 1/full.fit$normx)
  kp <- dim(betas)
  k <- kp[1]
  p <- kp[2]
  norm.bound <- fraction*(abs(sbetas[k,]) %*% rep(1, p))
  
  ###
  
  
  all.folds <- cv.folds(length(y), K)
  residmat <- matrix(0, length(fraction), K)
  for (i in seq(K)) {
    omit <- all.folds[[i]]
    fit <- lars(x[-omit, ], y[-omit], trace = trace)
    
    ### fit <- predict(fit, x[omit, , drop = FALSE], mode = "fraction", 
    ###    s = fraction)$fit
    ### Replace this line with
    
    fit <- predict.lars1(fit, x[omit, , drop = FALSE], mode = "norm", 
                         s = norm.bound)$fit
    ###
    
    if (length(omit) == 1) 
      fit <- matrix(fit, nrow = 1)
    residmat[, i] <- apply((y[omit] - fit)^2, 2, mean)
    if (trace) 
      cat("\n CV Fold", i, "\n\n")
  }
  cv <- apply(residmat, 1, mean)
  cv.error <- sqrt(apply(residmat, 1, var)/K)
  object <- list(fraction = fraction, cv = cv, cv.error = cv.error)
  if (plot.it) 
    plotCVLars(object, se = se)
  invisible(object)
 }


 predict.lars1 <- function (object, newx, s, type = c("fit", "coefficients"), mode = c("step", "fraction", "norm")) {
  mode <- match.arg(mode)
  type <- match.arg(type)
  if (missing(newx) & type == "fit") {
    warning("Type=fit with no newx argument; type switched to coefficients")
    type <- "coefficients"
  }
  betas <- object$beta
  sbetas <- scale(betas, FALSE, 1/object$normx)
  kp <- dim(betas)
  k <- kp[1]
  p <- kp[2]
  steps <- seq(k)
  if (missing(s)) {
    s <- steps
    mode <- "step"
  }
  sbeta <- switch(mode, step = {
    if (any(s < 0) | any(s > k)) 
      stop("Argument s out of range")
    steps
  }, fraction = {
    if (any(s > 1) | any(s < 0)) 
      stop("Argument s out of range")
    nbeta <- drop(abs(sbetas) %*% rep(1, p))
    nbeta/nbeta[k]
    
    ### This part is changed
    
  }, norm = {
    nbeta <- drop(abs(sbetas) %*% rep(1, p))
    if (any(s < 0)) 
      stop("Argument s out of range")
    nbeta
  })
  
  s[s>nbeta[k]] <- nbeta[k]
  
  ###
  
  sfrac <- (s - sbeta[1])/(sbeta[k] - sbeta[1])
  sbeta <- (sbeta - sbeta[1])/(sbeta[k] - sbeta[1])
  usbeta <- unique(sbeta)
  useq <- match(usbeta, sbeta)
  sbeta <- sbeta[useq]
  betas <- betas[useq, ]
  coord <- approx(sbeta, seq(sbeta), sfrac)$y
  left <- floor(coord)
  right <- ceiling(coord)
  newbetas <- ((sbeta[right] - sfrac) * betas[left, , drop = FALSE] + 
                 (sfrac - sbeta[left]) * betas[right, , drop = FALSE])/(sbeta[right] - 
                                                                          sbeta[left])
  newbetas[left == right, ] <- betas[left[left == right], ]
  robject <- switch(type, coefficients = list(s = s, fraction = sfrac, 
                                              mode = mode, coefficients = drop(newbetas)), fit = list(s = s, 
                                                                                                      fraction = sfrac, mode = mode, fit = drop(scale(newx, 
                                                                                                                                                      object$meanx, FALSE) %*% t(newbetas)) + object$mu))
  robject
 }
	library(MASS)
	library(lars)
	library(lavaan)

	####Code to wrap lasso and extra CV...
	### Taken from: http://www4.stat.ncsu.edu/~boos/var.select/lasso.dr1.txt

	lasso.dr1<-function(x,y,kfold=5,rep=10,plot=T,grid=seq(from=0,to=1,length=100)){
	# wrapper for lasso in lars, with modifications to cv.lars
	require(lars)
	# set.seed(seed)
	ok<-complete.cases(x,y)
	x<-x[ok,] # get rid of na's
	y<-y[ok] # since regsubsets can't handle na's
	m<-ncol(x)
	n<-nrow(x)
	as.matrix(x)->x
	cvout<-cv.lars1(x,y,K=kfold,plot.it=F,fraction=grid)
	sumcv<-cvout$cv
	frac<-cvout$fraction
	if(rep >1){
	for(i in 1:(rep-1)){
	sumcv<-sumcv+cv.lars1(x,y,K=kfold,plot.it=F,fraction=grid)$cv
	} # ends for
	} # ends if
	sumcv<-sumcv/rep
	if(plot) plot(frac,sumcv)
	sfrac<-frac[which.min(sumcv)] # assume frac stays the same
	object<-lars(x,y,type="lasso")
	fit <- predict.lars(object,x,s=sfrac,type="fit",mode="fraction")$fit
	coeff<-predict.lars(object,x,s=sfrac,type="coef",mode="fraction")$coefficients
	st<-sum(coeff !=0) # number nonzero
	mse<-sum((y-fit)^2)/(n-st-1)
	colnames(x)<-colnames(x,do.NULL=F,prefix="") # corrects for no colnames
	x<-x[,colnames(x)[which(coeff !=0)]]
	cat("",fill=T)
	cat("K (num. of folds)=",kfold,fill=T)
	cat("Num. of reps)=",rep,fill=T)
	cat("Fraction from CV=",sfrac,fill=T)
	cat("number nonzero=",st,fill=T)
	cat("mse=",mse,fill=T)
	cat("Variables Selected =",colnames(x),fill=T)
	return(list(l.object=object,frac=frac,fit=fit,coeff=coeff,st=st,mse=mse,sumcv=sumcv))
	}

	### taken from: http://www4.stat.ncsu.edu/~boos/var.select/cv.lars1.txt

	cv.lars1<- function (x, y, K = 10, fraction = seq(from = 0, to = 1, length = 100), trace = FALSE, plot.it = TRUE, se = TRUE) {
	# Howard Bondell modification of cv.lars
	### add this code

	full.fit <- lars(x, y, trace = trace)
	betas <- full.fit$beta
	sbetas <- scale(betas, FALSE, 1/full.fit$normx)
	kp <- dim(betas)
	k <- kp[1]
	p <- kp[2]
	norm.bound <- fraction(abs(sbetas[k,]) %% rep(1, p))

	###


	all.folds <- cv.folds(length(y), K)
	residmat <- matrix(0, length(fraction), K)
	for (i in seq(K)) {
	omit <- all.folds[[i]]
	fit <- lars(x[-omit, ], y[-omit], trace = trace)

	### fit <- predict(fit, x[omit, , drop = FALSE], mode = "fraction",
	### s = fraction)$fit
	### Replace this line with

	fit <- predict.lars1(fit, x[omit, , drop = FALSE], mode = "norm",
	s = norm.bound)$fit
	###

	if (length(omit) == 1)
	fit <- matrix(fit, nrow = 1)
	residmat[, i] <- apply((y[omit] - fit)^2, 2, mean)
	if (trace)
	cat("\n CV Fold", i, "\n\n")
	}
	cv <- apply(residmat, 1, mean)
	cv.error <- sqrt(apply(residmat, 1, var)/K)
	object <- list(fraction = fraction, cv = cv, cv.error = cv.error)
	if (plot.it)
	plotCVLars(object, se = se)
	invisible(object)
	}


	predict.lars1 <- function (object, newx, s, type = c("fit", "coefficients"), mode = c("step", "fraction", "norm")) {
	mode <- match.arg(mode)
	type <- match.arg(type)
	if (missing(newx) & type == "fit") {
	warning("Type=fit with no newx argument; type switched to coefficients")
	type <- "coefficients"
	}
	betas <- object$beta
	sbetas <- scale(betas, FALSE, 1/object$normx)
	kp <- dim(betas)
	k <- kp[1]
	p <- kp[2]
	steps <- seq(k)
	if (missing(s)) {
	s <- steps
	mode <- "step"
	}
	sbeta <- switch(mode, step = {
	if (any(s < 0) \| any(s > k))
	stop("Argument s out of range")
	steps
	}, fraction = {
	if (any(s > 1) \| any(s < 0))
	stop("Argument s out of range")
	nbeta <- drop(abs(sbetas) %*% rep(1, p))
	nbeta/nbeta[k]

	### This part is changed

	}, norm = {
	nbeta <- drop(abs(sbetas) %*% rep(1, p))
	if (any(s < 0))
	stop("Argument s out of range")
	nbeta
	})

	s[s>nbeta[k]] <- nbeta[k]

	###

	sfrac <- (s - sbeta[1])/(sbeta[k] - sbeta[1])
	sbeta <- (sbeta - sbeta[1])/(sbeta[k] - sbeta[1])
	usbeta <- unique(sbeta)
	useq <- match(usbeta, sbeta)
	sbeta <- sbeta[useq]
	betas <- betas[useq, ]
	coord <- approx(sbeta, seq(sbeta), sfrac)$y
	left <- floor(coord)
	right <- ceiling(coord)
	newbetas <- ((sbeta[right] - sfrac) * betas[left, , drop = FALSE] +
	(sfrac - sbeta[left]) * betas[right, , drop = FALSE])/(sbeta[right] -
	sbeta[left])
	newbetas[left == right, ] <- betas[left[left == right], ]
	robject <- switch(type, coefficients = list(s = s, fraction = sfrac,
	mode = mode, coefficients = drop(newbetas)), fit = list(s = s,
	fraction = sfrac, mode = mode, fit = drop(scale(newx,
	object$meanx, FALSE) %*% t(newbetas)) + object$mu))
	robject
	}