Last active
July 12, 2020 16:49
-
-
Save wviechtb/700983ab0bde94bed7c645fce770f8e9 to your computer and use it in GitHub Desktop.
Function to compute var-cov matrix of correlations (raw or r-to-z transformed)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rmat <- function(x, n, upper=TRUE, simplify=TRUE, rtoz=FALSE, data) { | |
if (inherits(x, "formula")) { | |
options(na.action = "na.pass") | |
if (missing(data)) | |
stop("Must specify 'data' argument when 'x' is a formula.") | |
if (!is.data.frame(data)) | |
data <- data.frame(data) | |
dat <- get_all_vars(x, data=data) | |
if (ncol(dat) != 4) | |
stop("Incorrect number of variables specified in formula.") | |
id <- dat[,4] | |
dat <- split(dat, id) | |
res <- list() | |
for (i in 1:length(dat)) { | |
ri <- dat[[i]][[1]] | |
var1 <- as.character(dat[[i]][[2]]) | |
var2 <- as.character(dat[[i]][[3]]) | |
vars <- sort(unique(c(var1, var2))) | |
R <- matrix(NA, nrow=length(vars), ncol=length(vars)) | |
diag(R) <- 1 | |
rownames(R) <- colnames(R) <- vars | |
for (j in 1:length(var1)) { | |
R[var1[j],var2[j]] <- R[var2[j],var1[j]] <- ri[j] | |
} | |
res[[i]] <- R | |
} | |
return(rmat(res, n=n, simplify=TRUE, rtoz=rtoz)) | |
} | |
### in case x is a list, need to loop through elements | |
if (is.list(x)) { | |
k <- length(x) | |
if (length(x) != length(n)) | |
stop("Argument 'n' must be of the same length as there are elements in 'x'.") | |
res <- list() | |
for (i in 1:k) { | |
res[[i]] <- rmat(x[[i]], n[i], upper=upper, rtoz=rtoz) | |
} | |
if (simplify) { | |
ki <- sapply(res, function(x) ifelse(is.null(x$dat), 0, nrow(x$dat))) | |
dat <- cbind(id=rep(1:k, times=ki), do.call(rbind, lapply(res, "[[", "dat"))) | |
V <- bldiag(lapply(res[ki > 0], "[[", "V")) | |
rownames(V) <- colnames(V) <- unlist(lapply(res, function(x) rownames(x$V))) | |
return(list(dat=dat, V=V)) | |
} else { | |
return(res) | |
} | |
} | |
### check if x is square matrix | |
if (!is.matrix(x)) | |
stop("Argument 'x' must be a matrix (or list thereof).") | |
if (dim(x)[1] != dim(x)[2]) | |
stop("Argument 'x' must be a square matrix (or list thereof).") | |
### set default dimension names | |
dimsx <- nrow(x) | |
dnames <- paste0("x", 1:dimsx) | |
### in case x has dimension names, use those | |
if (!is.null(rownames(x))) | |
dnames <- rownames(x) | |
if (!is.null(colnames(x))) | |
dnames <- colnames(x) | |
### in case x is a 1x1 (or 0x0) matrix, return nothing | |
if (dimsx <= 1L) | |
return(list(dat=NULL, V=NULL)) | |
### make x symmetric, depending on whether we use upper or lower part | |
if (upper) { | |
x[lower.tri(x)] <- t(x)[lower.tri(x)] | |
} else { | |
x[upper.tri(x)] <- t(x)[upper.tri(x)] | |
} | |
### check if x is symmetric (can be skipped since x must now be symmetric) | |
#if (!isSymmetric(x)) | |
# stop("x must be a symmetric matrix.") | |
### stack upper/lower triangular part of x into a column vector (this is always done column-wise!) | |
if (upper) { | |
ri <- cbind(x[upper.tri(x)]) | |
} else { | |
ri <- cbind(x[lower.tri(x)]) | |
} | |
### apply r-to-z transformation if requested | |
if (rtoz) | |
ri <- 1/2 * log((1 + ri)/(1 - ri)) | |
### I and J are matrices with 1:dimsx for rows and columns, respectively | |
I <- matrix(1:dimsx, nrow=dimsx, ncol=dimsx) | |
J <- matrix(1:dimsx, nrow=dimsx, ncol=dimsx, byrow=TRUE) | |
### get upper/lower triangular elements of I and J | |
if (upper) { | |
I <- I[upper.tri(I)] | |
J <- J[upper.tri(J)] | |
} else { | |
I <- I[lower.tri(I)] | |
J <- J[lower.tri(J)] | |
} | |
### dimensions in V (must be dimsx*(dimsx-1)/2) | |
dimsV <- length(ri) | |
### set up V matrix | |
V <- matrix(NA, nrow=dimsV, ncol=dimsV) | |
for (ro in 1:dimsV) { | |
for (co in 1:dimsV) { | |
i <- I[ro] | |
j <- J[ro] | |
k <- I[co] | |
l <- J[co] | |
### Olkin & Finn (1995), equation 5, page 157 | |
V[ro,co] <- 1/2 * x[i,j]*x[k,l] * (x[i,k]^2 + x[i,l]^2 + x[j,k]^2 + x[j,l]^2) + | |
x[i,k]*x[j,l] + x[i,l]*x[j,k] - | |
(x[i,j]*x[i,k]*x[i,l] + x[j,i]*x[j,k]*x[j,l] + x[k,i]*x[k,j]*x[k,l] + x[l,i]*x[l,j]*x[l,k]) | |
### Steiger (1980), equation 2, page 245 (provides the same result - checked) | |
#V[ro,co] <- 1/2 * ((x[i,k] - x[i,j]*x[j,k]) * (x[j,l] - x[j,k]*x[k,l]) + | |
# (x[i,l] - x[i,k]*x[k,l]) * (x[j,k] - x[j,i]*x[i,k]) + | |
# (x[i,k] - x[i,l]*x[l,k]) * (x[j,l] - x[j,i]*x[i,l]) + | |
# (x[i,l] - x[i,j]*x[j,l]) * (x[j,k] - x[j,l]*x[l,k])) | |
### Steiger (1980), equation 11, page 247 for r-to-z transformed values | |
if (rtoz) | |
V[ro,co] <- V[ro,co] / ((1 - x[i,j]^2) * (1 - x[k,l]^2)) | |
} | |
} | |
### divide V by (n-1) for raw correlations and by (n-3) for r-to-z transformed correlations | |
if (rtoz) { | |
V <- V/(n-3) | |
} else { | |
V <- V/(n-1) | |
} | |
### create matrix with var1 and var2 names and sort rowwise | |
dmat <- cbind(dnames[I], dnames[J]) | |
dmat <- t(apply(dmat, 1, sort)) | |
### set row/column names for V | |
var1var2 <- paste0(dmat[,1], ".", dmat[,2]) | |
rownames(V) <- colnames(V) <- var1var2 | |
return(list(dat=data.frame(yi=ri, var1=dmat[,1], var2=dmat[,2], var1var2=var1var2, stringsAsFactors=FALSE), V=V)) | |
} | |
if (F) { | |
### from Olkin & Finn (1990), Table 1, page 332 | |
R2 <- matrix(c( 1, .179, .396, .080, | |
.179, 1, .088, -.042, | |
.396, .088, 1, .719, | |
.080, -.042, .719, 1), nrow=4, ncol=4, byrow=TRUE) | |
rownames(R2) <- colnames(R2) <- c("BMI", "PUL", "SBP", "DPB") | |
### from Olkin & Finn (1990), Table 2, page 333 | |
R3 <- matrix(c( 1, NA, NA, NA, NA, NA, | |
.189, 1, NA, NA, NA, NA, | |
.462, -.103, 1, NA, NA, NA, | |
.024, .013, .305, 1, NA, NA, | |
.208, .143, .395, .149, 1, NA, | |
.023, .423, -.091, .098, .396, 1), nrow=6, ncol=6, byrow=TRUE) | |
rownames(R3) <- colnames(R3) <- c("C.BMI", "C.SBP", "S.BMI", "S.SBP", "M.BMI", "M.SBP") | |
res <- rmat(R2, 67) ### n=66, but formulas in article use 1/n, so set n=67 | |
res$dat <- res$dat[c(1,2,4),] | |
res$V <- round(res$V[c(1,2,4),c(1,2,4)] * 1000, 3) | |
res ### some slight discrepancies (probably due to correlations being rounded in Table 1) | |
res <- rmat(R3, 67, upper=FALSE) ### n=66, but formulas in article use 1/n, so set n=67 | |
res$dat <- res$dat[c(1,10,15),] | |
res$V <- round(res$V[c(1,10,15),c(1,10,15)] * 1000, 3) | |
res | |
### test that passing long format data via formula works | |
dat <- rmat(list(R2,R2), c(20,30))$dat | |
rmat(yi ~ var1 + var2 | id, n=c(20,30), data=dat) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment