Created
November 28, 2013 08:46
-
-
Save menugget/7688922 to your computer and use it in GitHub Desktop.
BVSTEP (Clarke and Ainsworth 1992). Requires package "vegan".
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bv.step <- function(fix.mat, var.mat, | |
fix.dist.method="bray", var.dist.method="euclidean", | |
scale.fix=FALSE, scale.var=TRUE, | |
max.rho=0.95, | |
min.delta.rho=0.001, | |
random.selection=TRUE, | |
prop.selected.var=0.2, | |
num.restarts=10, | |
var.always.include=NULL, | |
var.exclude=NULL, | |
output.best=10 | |
){ | |
if(dim(fix.mat)[1] != dim(var.mat)[1]){stop("fixed and variable matrices must have the same number of rows")} | |
if(sum(var.always.include %in% var.exclude) > 0){stop("var.always.include and var.exclude share a variable")} | |
require(vegan) | |
if(scale.fix){fix.mat<-scale(fix.mat)}else{fix.mat<-fix.mat} | |
if(scale.var){var.mat<-scale(var.mat)}else{var.mat<-var.mat} | |
fix.dist <- vegdist(as.matrix(fix.mat), method=fix.dist.method) | |
#an initial removal phase | |
var.dist.full <- vegdist(as.matrix(var.mat), method=var.dist.method) | |
full.cor <- suppressWarnings(cor.test(fix.dist, var.dist.full, method="spearman"))$estimate | |
var.comb <- combn(1:ncol(var.mat), ncol(var.mat)-1) | |
RES <- data.frame(var.excl=rep(NA,ncol(var.comb)), n.var=ncol(var.mat)-1, rho=NA) | |
for(i in 1:dim(var.comb)[2]){ | |
var.dist <- vegdist(as.matrix(var.mat[,var.comb[,i]]), method=var.dist.method) | |
temp <- suppressWarnings(cor.test(fix.dist, var.dist, method="spearman")) | |
RES$var.excl[i] <- c(1:ncol(var.mat))[-var.comb[,i]] | |
RES$rho[i] <- temp$estimate | |
} | |
delta.rho <- RES$rho - full.cor | |
exclude <- sort(unique(c(RES$var.excl[which(abs(delta.rho) < min.delta.rho)], var.exclude))) | |
if(random.selection){ | |
num.restarts=num.restarts | |
prop.selected.var=prop.selected.var | |
prob<-rep(1,ncol(var.mat)) | |
if(prop.selected.var< 1){ | |
prob[exclude]<-0 | |
} | |
n.selected.var <- min(sum(prob),prop.selected.var*dim(var.mat)[2]) | |
} else { | |
num.restarts=1 | |
prop.selected.var=1 | |
prob<-rep(1,ncol(var.mat)) | |
n.selected.var <- min(sum(prob),prop.selected.var*dim(var.mat)[2]) | |
} | |
RES_TOT <- c() | |
for(i in 1:num.restarts){ | |
step=1 | |
RES <- data.frame(step=step, step.dir="F", var.incl=NA, n.var=0, rho=0) | |
attr(RES$step.dir, "levels") <- c("F","B") | |
best.comb <- which.max(RES$rho) | |
best.rho <- RES$rho[best.comb] | |
delta.rho <- Inf | |
selected.var <- sort(unique(c(sample(1:dim(var.mat)[2], n.selected.var, prob=prob), var.always.include))) | |
while(best.rho < max.rho & delta.rho > min.delta.rho & RES$n.var[best.comb] < length(selected.var)){ | |
#forward step | |
step.dir="F" | |
step=step+1 | |
var.comb <- combn(selected.var, RES$n.var[best.comb]+1, simplify=FALSE) | |
if(RES$n.var[best.comb] == 0){ | |
var.comb.incl<-1:length(var.comb) | |
} else { | |
var.keep <- as.numeric(unlist(strsplit(RES$var.incl[best.comb], ","))) | |
temp <- NA*1:length(var.comb) | |
for(j in 1:length(temp)){ | |
temp[j] <- all(var.keep %in% var.comb[[j]]) | |
} | |
var.comb.incl <- which(temp==1) | |
} | |
RES.f <- data.frame(step=rep(step, length(var.comb.incl)), step.dir=step.dir, var.incl=NA, n.var=RES$n.var[best.comb]+1, rho=NA) | |
for(f in 1:length(var.comb.incl)){ | |
var.incl <- var.comb[[var.comb.incl[f]]] | |
var.incl <- var.incl[order(var.incl)] | |
var.dist <- vegdist(as.matrix(var.mat[,var.incl]), method=var.dist.method) | |
temp <- suppressWarnings(cor.test(fix.dist, var.dist, method="spearman")) | |
RES.f$var.incl[f] <- paste(var.incl, collapse=",") | |
RES.f$rho[f] <- temp$estimate | |
} | |
last.F <- max(which(RES$step.dir=="F")) | |
RES <- rbind(RES, RES.f[which.max(RES.f$rho),]) | |
best.comb <- which.max(RES$rho) | |
delta.rho <- RES$rho[best.comb] - best.rho | |
best.rho <- RES$rho[best.comb] | |
if(best.comb == step){ | |
while(best.comb == step & RES$n.var[best.comb] > 1){ | |
#backward step | |
step.dir="B" | |
step <- step+1 | |
var.keep <- as.numeric(unlist(strsplit(RES$var.incl[best.comb], ","))) | |
var.comb <- combn(var.keep, RES$n.var[best.comb]-1, simplify=FALSE) | |
RES.b <- data.frame(step=rep(step, length(var.comb)), step.dir=step.dir, var.incl=NA, n.var=RES$n.var[best.comb]-1, rho=NA) | |
for(b in 1:length(var.comb)){ | |
var.incl <- var.comb[[b]] | |
var.incl <- var.incl[order(var.incl)] | |
var.dist <- vegdist(as.matrix(var.mat[,var.incl]), method=var.dist.method) | |
temp <- suppressWarnings(cor.test(fix.dist, var.dist, method="spearman")) | |
RES.b$var.incl[b] <- paste(var.incl, collapse=",") | |
RES.b$rho[b] <- temp$estimate | |
} | |
RES <- rbind(RES, RES.b[which.max(RES.b$rho),]) | |
best.comb <- which.max(RES$rho) | |
best.rho<- RES$rho[best.comb] | |
} | |
} else { | |
break() | |
} | |
} | |
RES_TOT <- rbind(RES_TOT, RES[2:dim(RES)[1],]) | |
print(paste(round((i/num.restarts)*100,3), "% finished")) | |
} | |
RES_TOT <- unique(RES_TOT[,3:5]) | |
if(dim(RES_TOT)[1] > output.best){ | |
order.by.best <- RES_TOT[order(RES_TOT$rho, decreasing=TRUE)[1:output.best],] | |
} else { | |
order.by.best <- RES_TOT[order(RES_TOT$rho, decreasing=TRUE), ] | |
} | |
rownames(order.by.best)<-NULL | |
order.by.i.comb <- c() | |
for(i in 1:length(selected.var)){ | |
f1 <- which(RES_TOT$n.var==i) | |
f2 <- which.max(RES_TOT$rho[f1]) | |
order.by.i.comb <- rbind(order.by.i.comb, RES_TOT[f1[f2],]) | |
} | |
rownames(order.by.i.comb)<-NULL | |
if(length(exclude)<1){var.exclude=NULL} else {var.exclude=exclude} | |
out <- list( | |
order.by.best=order.by.best, | |
order.by.i.comb=order.by.i.comb, | |
best.model.vars=paste(colnames(var.mat)[as.numeric(unlist(strsplit(order.by.best$var.incl[1], ",")))], collapse=","), | |
best.model.rho=order.by.best$rho[1], | |
var.always.include=var.always.include, | |
var.exclude=var.exclude | |
) | |
out | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment