Last active
February 15, 2017 18:27
-
-
Save tomsing1/abd9b06976138f4fe88978637ec116a9 to your computer and use it in GitHub Desktop.
R script demonstrating how spurious correlations between two variables can be introduced by common normalization factor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create 3 random vectors of length 50 | |
# We use human / mouse gene symbols to illustrate that this | |
# reflects commonly used procedures in biomedical science, | |
# e.g. the analysis of qPCR results with a single | |
# reference gene. | |
# See MIQE guidelines on how to normalize properly: | |
# https://www.ncbi.nlm.nih.gov/pubmed/19246619 | |
Il6 <- rnorm(mean = 10, n = 50) | |
Il1b <- rnorm(mean = 10, n = 50) | |
Gapdh <- rnorm(mean = 10, sd = 3, n = 50) | |
# random numbers are not correlated with each other | |
panel.cor <- function(x, y, digits = 2, ...) { | |
usr <- par("usr"); on.exit(par(usr)) | |
par(usr = c(0, 1, 0, 1)) | |
r <- cor(x, y) | |
p <- cor.test(x, y)$p.value | |
txt <- sprintf("R = %.02f\np = %.02f", r, p) | |
text(0.5, 0.5, txt, cex = 2) | |
} | |
pairs(cbind(Il6, Il1b, Gapdh), upper.panel = panel.cor) | |
# But when Il6 and Il1b are 'normalized' to Gapdh, | |
# strong correlations are induced | |
plot(Il6/Gapdh, Il1b/Gapdh) | |
title(main = sprintf( | |
"R = %.02f p = %s", | |
cor(Il6/Gapdh, Il1b/Gapdh), | |
signif(cor.test(Il6/Gapdh, Il1b/Gapdh)$p.value))) | |
abline(0, 1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment