Last active
May 20, 2024 17:33
-
-
Save timflutre/43daacf2c8868f609489 to your computer and use it in GitHub Desktop.
compare lme4 and rrBLUP to fit an "animal model" (LMM) in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Model: a specific kind of linear mixed model known as "animal model" by geneticists | |
## y = mu 1_N + X b + Z u + e = W a + Z u + e | |
## y is N x 1; X is N x P; Z is N x Q; W is N x (P+1) | |
## u ~ Norm_Q(0, sigma_u^2 A); e ~ Norm_N(0, sigma^2 I_N) | |
## Goal of this document: estimate the variance components sigma_u^2 and sigma^2 | |
## 1) simulate some data | |
## 2) fit the model above using the package rrBLUP (v4.3, on CRAN) | |
## 3) fit the model above using lme4 (v1.7, on CRAN) | |
## Author: Timothée Flutre (INRA) | |
## ----pkg----------------------------------------------------------------- | |
seed <- 1859; set.seed(seed) | |
suppressPackageStartupMessages(library(MASS)) | |
suppressPackageStartupMessages(library(Matrix)) | |
suppressPackageStartupMessages(library(rrBLUP)) | |
suppressPackageStartupMessages(library(lme4)) | |
## ----simul_animals------------------------------------------------------- | |
N <- 1000 # chosen so that one has enough power | |
animal.ids <- sprintf(fmt=paste0("ind%0", floor(log10(N))+1, "i"), 1:N) | |
head(animal.ids) | |
## ----simul_global_mean--------------------------------------------------- | |
mu <- 4 # arbitrary | |
## ----simul_other_fixed_effects------------------------------------------- | |
P <- 1 | |
X <- matrix(data=rnorm(n=N), nrow=N, ncol=P) | |
b <- matrix(data=c(2), nrow=P, ncol=1) # arbitrary | |
## ----simul_all_fixed_effects--------------------------------------------- | |
W <- cbind(rep(1, N), X) | |
a <- matrix(c(mu, b)) | |
head(W) | |
head(a) | |
## ----simul_A------------------------------------------------------------- | |
Q <- N | |
nb.factors <- 5 | |
nb.snps <- 1000 | |
snp.ids <- sprintf(fmt=paste0("snp%0", floor(log10(nb.snps))+1, "i"), | |
1:nb.snps) | |
F <- matrix(data=rnorm(n=Q*nb.factors), nrow=Q, ncol=nb.factors) | |
M <- mvrnorm(n=nb.snps, mu=rep(1, Q), Sigma=F %*% t(F) + diag(Q)) | |
M[M < 0.2] <- 0 | |
M[M >= 0.2 & M <= 1.8] <- 1 | |
M[M > 1.8] <- 2 | |
A <- (1/nb.snps) * t(M) %*% M | |
print(A[1:5,1:5]) | |
kappa(A) | |
image(A[,nrow(A):1], axes=FALSE) | |
## ----simul_random_effects------------------------------------------------ | |
Z <- diag(Q) | |
sigmau2 <- 5 # arbitrary | |
G <- as.matrix(nearPD(sigmau2 * A)$mat) | |
u <- matrix(mvrnorm(n=1, mu=rep(0, Q), Sigma=G)) | |
summary(u) | |
## ----simul_errors-------------------------------------------------------- | |
lambda <- 3 # interesting to change it and see how inference goes | |
sigma2 <- sigmau2 / lambda | |
R <- sigma2 * diag(N) | |
e <- matrix(mvrnorm(n=1, mu=rep(0, N), Sigma=R)) | |
## ----h2------------------------------------------------------------------ | |
(h2 <- sigmau2 / (sigmau2 + sigma2)) | |
## ----simul_phenotypes---------------------------------------------------- | |
y <- W %*% a + Z %*% u + e | |
## ----simul_reformat------------------------------------------------------ | |
dat <- data.frame(fix=W[,2], | |
animal=factor(animal.ids), | |
response=y[,1]) | |
str(dat) | |
summary(dat) | |
## ----fit_rrBLUP---------------------------------------------------------- | |
system.time(fit.rrBLUP <- mixed.solve(y=y, | |
Z=Z, | |
K=A, | |
X=W, | |
method="REML", | |
SE=TRUE, | |
return.Hinv=TRUE)) | |
## time: user=3.209 sys=0.004 elapsed=3.214 | |
fit.rrBLUP$Vu # 5.008 | |
sigmau2 # 5 | |
fit.rrBLUP$Ve # 1.665 | |
sigma2 # 1.667 | |
## very good! | |
## ----fit_lme4------------------------------------------------------------ | |
formula <- response ~ 1 + fix + (1|animal) | |
parsedFormula <- lFormula(formula=formula, data=dat, | |
control=lmerControl(check.nobs.vs.nlev="ignore", | |
check.nobs.vs.nRE="ignore")) | |
relmat <- list(animal=A) | |
relfac <- relmat | |
flist <- parsedFormula$reTrms[["flist"]] # list of grouping factors | |
Ztlist <- parsedFormula$reTrms[["Ztlist"]] # list of transpose of the sparse model matrices | |
stopifnot(all(names(relmat) %in% names(flist))) | |
asgn <- attr(flist, "assign") | |
for(i in seq_along(relmat)) { | |
tn <- which(match(names(relmat)[i], names(flist)) == asgn) | |
if(length(tn) > 1) | |
stop("a relationship matrix must be associated", | |
" with only one random effects term", call.=FALSE) | |
relmat[[i]] <- Matrix(relmat[[i]], sparse=TRUE) | |
relfac[[i]] <- chol(relmat[[i]]) | |
Ztlist[[i]] <- relfac[[i]] %*% Ztlist[[i]] | |
} | |
parsedFormula$reTrms[["Ztlist"]] <- Ztlist | |
parsedFormula$reTrms[["Zt"]] <- do.call(rBind, Ztlist) | |
devianceFunction <- do.call(mkLmerDevfun, parsedFormula) | |
optimizerOutput <- optimizeLmer(devianceFunction) | |
fit.lme4 <- mkMerMod(rho=environment(devianceFunction), | |
opt=optimizerOutput, | |
reTrms=parsedFormula$reTrms, | |
fr=parsedFormula$fr) | |
summary(fit.lme4) | |
vc <- as.data.frame(VarCorr(fit.lme4)) | |
(sigma2.hat <- vc[vc$grp == "Residual", "vcov"]) # 1.666 | |
sigma2 # 1.667 | |
(sigmau2.hat <- vc[vc$grp == "animal", "vcov"]) # 5.008 | |
sigmau2 # 5 | |
## very good, same as rrBLUP |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Ok, thanks for checking, but I don't have any explanation for that. For a given run on a given machine, as long as all the tools agree, that's the most important.