########
#
# http://mikelove.wordpress.com/2009/09/14/measure-of-structure-in-residual-matrix/
#
#######

k <- 15
p <- 40
n <- 100
var <- 10
contribution.max.var <- 10
cvar <- seq(from=contribution.max.var,to=1,length=k)
components <- t(replicate(k,rnorm(p)))
data <- t(replicate(n,colSums(components*matrix(rep(rnorm(k,0,cvar),times=p),ncol=p))))
data <- data + matrix(rnorm(p*n,0,var),ncol=p)
s <- svd(data)

residualMatrix <- function(s,data,cutoff) {
  if (cutoff > 1) {
    x <- s$u[,1:cutoff] %*% diag(s$d)[1:cutoff,1:cutoff] %*% t(s$v[,1:cutoff])
  } else if (cutoff == 1) {
    x <- s$d[1] * s$u[,1] %*% t(s$v[,1])
  }
  data-x
}

structureMeasure <- function(s,data,cutoff) {
  residual.matrix <- residualMatrix(s,data,cutoff)
  residual.s <- svd(residual.matrix,nu=1,nv=1)
  nc <- ncol(residual.matrix)
  nr <- nrow(residual.matrix)
  alt.norms <- rep(NA,10)
  for (i in 1:10) {
    altered.matrix <- residual.matrix*matrix(sample(c(-1,1),nc*nr,replace=T),ncol=nc)
    alt.s <- svd(altered.matrix,nu=1,nv=1)
    alt.norms[i] <- alt.s$d[1]
  }
  # 2-norm of residual matrix minus
  # the average 2-norm of altered residual matrix
  # divided by the frobenius norm of residual matrix
  (residual.s$d[1]-mean(alt.norms))/sqrt(sum(residual.matrix^2))
}

system.time(y <- lapply(1:(p-1),function(i) try(structureMeasure(s,data,i))))
y2 <- unlist(y[sapply(y, function(x) !inherits(x, "try-error"))])
y2.ind <- which(sapply(y, function(x) !inherits(x, "try-error")))

par(mfrow = c(2,1),mar=c(3,3,2,2))
plot(s$d,main="singular values",ylab="",xlab="")
abline(v=k)
plot(y2.ind,log(y2),main="residual structure measure",ylab="",xlab="")
abline(v=k)
abline(v=y2.ind[which.min(y2)],col="red")
legend("bottomright",lty=1,legend=c("minimum","true k"),col=c("red","black"),cex=.7)