######## # # http://mikelove.wordpress.com/2009/09/14/measure-of-structure-in-residual-matrix/ # ####### k <- 15 p <- 40 n <- 100 var <- 10 contribution.max.var <- 10 cvar <- seq(from=contribution.max.var,to=1,length=k) components <- t(replicate(k,rnorm(p))) data <- t(replicate(n,colSums(components*matrix(rep(rnorm(k,0,cvar),times=p),ncol=p)))) data <- data + matrix(rnorm(p*n,0,var),ncol=p) s <- svd(data) residualMatrix <- function(s,data,cutoff) { if (cutoff > 1) { x <- s$u[,1:cutoff] %*% diag(s$d)[1:cutoff,1:cutoff] %*% t(s$v[,1:cutoff]) } else if (cutoff == 1) { x <- s$d[1] * s$u[,1] %*% t(s$v[,1]) } data-x } structureMeasure <- function(s,data,cutoff) { residual.matrix <- residualMatrix(s,data,cutoff) residual.s <- svd(residual.matrix,nu=1,nv=1) nc <- ncol(residual.matrix) nr <- nrow(residual.matrix) alt.norms <- rep(NA,10) for (i in 1:10) { altered.matrix <- residual.matrix*matrix(sample(c(-1,1),nc*nr,replace=T),ncol=nc) alt.s <- svd(altered.matrix,nu=1,nv=1) alt.norms[i] <- alt.s$d[1] } # 2-norm of residual matrix minus # the average 2-norm of altered residual matrix # divided by the frobenius norm of residual matrix (residual.s$d[1]-mean(alt.norms))/sqrt(sum(residual.matrix^2)) } system.time(y <- lapply(1:(p-1),function(i) try(structureMeasure(s,data,i)))) y2 <- unlist(y[sapply(y, function(x) !inherits(x, "try-error"))]) y2.ind <- which(sapply(y, function(x) !inherits(x, "try-error"))) par(mfrow = c(2,1),mar=c(3,3,2,2)) plot(s$d,main="singular values",ylab="",xlab="") abline(v=k) plot(y2.ind,log(y2),main="residual structure measure",ylab="",xlab="") abline(v=k) abline(v=y2.ind[which.min(y2)],col="red") legend("bottomright",lty=1,legend=c("minimum","true k"),col=c("red","black"),cex=.7)