Skip to content

Instantly share code, notes, and snippets.

@explodecomputer
Created June 30, 2014 10:53
Show Gist options
  • Save explodecomputer/1608e4ff7bc7dbdf795a to your computer and use it in GitHub Desktop.
Save explodecomputer/1608e4ff7bc7dbdf795a to your computer and use it in GitHub Desktop.
m <- data.frame(matrix(rnorm(1000*20), 1000, 20))
y = rnorm(1000)
performScan <- function(y, dat)
{
n <- ncol(dat)
res <- array(0, n)
for(i in 1:n)
{
n[i] <- anova(lm(y ~ dat[,i]))$P[1]
}
return(n)
}
permutations <- function(y, dat, nperm)
{
res <- array(0, nperm)
for(i in 1:nperm)
{
cat(i, "\n")
# Randomly shuffle the y vector, but don't change dat
# This means that y1 is no longer related to dat
# but dat still has the same correlation structure
y1 <- sample(y, length(y), replace=FALSE)
# Perform the full analysis of y1 against all dat variables
p <- performScan(y1, dat)
# Just keep the smallest p value -
# this p value should have no biological relevance because y is not related to dat
# But it will give some indication as to how much correlation structure there is in dat
res[i] <- min(p)
}
# Sort the saved p values
res <- sort(res, decreasing=FALSE)
# The 0.05 family wise error rate is the 5%th most extreme pvalue
threshold <- res[round(0.05*nperm)]
return(list(pvals=res, threshold=threshold))
}
perms <- permutations(y, dat, 100)
# Distribution of p values that you would expect by chance if the null hypotheses are true
hist(-log10(perms$pvals))
# this is the threshold
perms$threshold
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment