Created
November 11, 2015 09:24
-
-
Save gurix/10c675632798da04d9d2 to your computer and use it in GitHub Desktop.
Monkey patch alpha function in psych package to suppress the direct output for items were negatively correlated with the total scale
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"alpha" <- function(x,keys=NULL,cumulative=FALSE,title=NULL,max=10,na.rm=TRUE,check.keys=FALSE,n.iter=1,delete=TRUE,use="pairwise") { #find coefficient alpha given a data frame or a matrix | |
alpha.1 <- function(C,R) { | |
n <- dim(C)[2] | |
alpha.raw <- (1- tr(C)/sum(C))*(n/(n-1)) | |
alpha.std <- (1- n/sum(R))*(n/(n-1)) | |
smc.R <- smc(R) | |
G6 <- (1- (n-sum(smc.R))/sum(R)) | |
av.r <- (sum(R)-n)/(n*(n-1)) | |
sn <- n*av.r/(1-av.r) | |
Q = (2 * n^2/((n-1)^2*(sum(C)^3))) * (sum(C) * (tr(C^2) + (tr(C))^2) - 2*(tr(C) * sum(C^2))) | |
result <- list(raw=alpha.raw,std=alpha.std,G6=G6,av.r=av.r,sn=sn,Q=Q) | |
return(result) | |
} | |
#begin main function | |
cl <- match.call() | |
if(!is.matrix(x) && !is.data.frame(x)) stop('Data must either be a data frame or a matrix') | |
nvar <- dim(x)[2] | |
nsub <- dim(x)[1] | |
scores <- NULL | |
response.freq <- NULL | |
if (nsub !=nvar) { | |
item.var <- apply(x,2,sd,na.rm=na.rm) | |
bad <- which((item.var <= 0)|is.na(item.var)) | |
if((length(bad) > 0) && delete) { | |
for (baddy in 1:length(bad)) {warning( "Item = ",colnames(x)[bad][baddy], " had no variance and was deleted")} | |
x <- x[,-bad] | |
nvar <- nvar - length(bad) | |
} | |
response.freq <- response.frequencies(x,max=max) | |
C <- cov(x,use=use)} else {C <- x} | |
p1 <- principal(x) | |
if(any(p1$loadings < 0)) { | |
if (check.keys) { | |
warning("Some items were negatively correlated with total scale and were automatically reversed.\n This is indicated by a negative sign for the variable name.") | |
keys <- 1- 2* (p1$loadings < 0 ) | |
} else { | |
if(is.null(keys)) { | |
warning(paste("Some items (",rownames(p1$loadings)[(p1$loadings < 0)],") were negatively correlated with the total scale and probably should be reversed. To do this, run the function again with the 'check.keys=TRUE' option")) | |
} | |
} | |
} | |
if(is.null(keys)) {keys <- rep(1,nvar)} else { | |
keys<- as.vector(keys) | |
if(length(keys) < nvar) { | |
temp <- keys # this is the option of keying just the reversals | |
keys <- rep(1,nvar) | |
names(keys) <- colnames(x) | |
keys[temp] <- -1 | |
} | |
} | |
key.d <- diag(keys) | |
C <- key.d %*% C %*% key.d | |
signkey <- strtrim(keys,1) | |
signkey[signkey=="1"] <- "" | |
colnames(x) <- paste(colnames(x),signkey,sep="") | |
if (nsub !=nvar) { #raw data | |
if(any(keys < 0 )) { | |
min.item <- min(x,na.rm=na.rm) | |
max.item <- max(x,na.rm=na.rm) | |
adjust <- max.item + min.item | |
flip.these <- which(keys < 0 ) | |
x[,flip.these] <- adjust - x[,flip.these] | |
} | |
if(cumulative) { total <- rowSums(x,na.rm=na.rm) } else { total <- rowMeans(x,na.rm=na.rm) } | |
mean.t <- mean(total,na.rm=na.rm) | |
sdev <- sd(total,na.rm=na.rm) | |
raw.r <- cor(total,x,use=use) | |
t.valid <- colSums(!is.na(x)) | |
} else { #we are working with a correlation matrix | |
total <- NULL | |
totals <- TRUE | |
} | |
R <- cov2cor(C) | |
drop.item <- vector("list",nvar) | |
alpha.total <- alpha.1(C,R) | |
if(nvar > 2) { | |
for (i in 1:nvar) { | |
drop.item[[i]] <- alpha.1(C[-i,-i,drop=FALSE],R[-i,-i,drop=FALSE]) | |
} | |
} else { | |
drop.item[[1]] <- drop.item[[2]] <- c(rep(R[1,2],2),smc(R)[1],R[1,2],NA,NA)} | |
by.item <- data.frame(matrix(unlist(drop.item),ncol=6,byrow=TRUE)) | |
if(nsub > nvar) { | |
by.item[6] <- sqrt(by.item[6]/nsub) | |
colnames(by.item) <- c("raw_alpha","std.alpha","G6(smc)","average_r","S/N","alpha se") | |
} else { | |
by.item <- by.item[-6] | |
colnames(by.item) <- c("raw_alpha","std.alpha","G6(smc)","average_r","S/N") | |
} | |
rownames(by.item) <- colnames(x) | |
Vt <- sum(R) | |
item.r <- colSums(R)/sqrt(Vt) #this is standardized r | |
#correct for item overlap by using smc | |
RC <-R | |
diag(RC) <-smc(R) | |
Vtc <- sum(RC) | |
item.rc <-colSums(RC)/sqrt(Vtc) | |
#yet one more way to correct is to correlate item with rest of scale | |
if(nvar > 1) { | |
r.drop <- rep(0,nvar) | |
for (i in 1:nvar) { v.drop <- sum(C[-i,-i,drop=FALSE]) | |
c.drop <- sum(C[,i]) - C[i,i] | |
r.drop[i] <- c.drop/sqrt(C[i,i]*v.drop) | |
} | |
} | |
item.means <- colMeans(x, na.rm=na.rm ) | |
item.sd <- apply(x,2,sd,na.rm=na.rm) | |
if(nsub > nvar) { | |
ase = sqrt(alpha.total$Q/nsub) | |
alpha.total <- data.frame(alpha.total[1:5],ase=ase,mean=mean.t,sd=sdev) | |
colnames(alpha.total) <- c("raw_alpha","std.alpha","G6(smc)","average_r","S/N","ase","mean","sd") | |
rownames(alpha.total) <- "" | |
stats <- data.frame(n=t.valid,raw.r=t(raw.r),std.r =item.r,r.cor = item.rc,r.drop = r.drop,mean=item.means,sd=item.sd) | |
} else { | |
alpha.total <- data.frame(alpha.total[-6]) #fixed 27/7/14 | |
colnames(alpha.total) <- c("raw_alpha","std.alpha","G6(smc)" ,"average_r","S/N") | |
rownames(alpha.total) <- "" | |
stats <- data.frame(r =item.r,r.cor = item.rc,r.drop = r.drop) #added r.drop 10/12/13 | |
} | |
rownames(stats) <- colnames(x) | |
if(n.iter > 1) { | |
# do a bootstrap confidence interval for alpha | |
# if(!require(parallel)) {message("The parallel package needs to be installed to run mclapply")} | |
if(nsub == nvar) { | |
message("bootstrapped confidence intervals require raw data") | |
boot <- NULL | |
boot.ci <- NULL | |
} else { | |
boot <- vector("list",n.iter) | |
boot <- mclapply(1:n.iter,function(XX) { | |
xi <- x[sample.int(nsub,replace=TRUE),] | |
C <- cov(xi,use="pairwise") | |
if(!is.null(keys)) { | |
key.d <- diag(keys) | |
xi <- key.d %*% C %*% key.d | |
} | |
R <- cov2cor(C) | |
alpha.1(C,R) | |
}) #end of mclapply | |
boot <- matrix(unlist(boot),ncol=6,byrow=TRUE) | |
colnames(boot) <- c("raw_alpha","std.alpha","G6(smc)","average_r","s/n","ase") | |
boot.ci <- quantile(boot[,1],c(.025,.5,.975)) | |
} | |
} else { | |
boot=NULL | |
boot.ci <- NULL | |
} | |
result <- list(total=alpha.total,alpha.drop=by.item,item.stats=stats,response.freq=response.freq,keys=keys,scores = total,nvar=nvar,boot.ci=boot.ci,boot=boot,call=cl,title=title) | |
class(result) <- c("psych","alpha") | |
return(result) | |
} | |
#modified Sept 8, 2010 to add r.drop feature | |
#modified October 12, 2011 to add apply to the sd function | |
#modified November 2, 2010 to use sd instead of SD | |
#January 30, 2011 - added the max category parameter (max) | |
#June 20, 2011 -- revised to add the check.keys option as suggested by Jeremy Miles | |
#Oct 3, 2013 check for variables with no variance and drop them with a warning | |
#November 22, 2013 Added the standard error as suggested by | |
#modified December 6, 2013 to add empirical confidence estimates | |
#modified January 9, 2014 to add multicore capabilities to the bootstrap | |
#corrected December 18 to allow reverse keying for correlation matrices as well as raw data | |
#modified 1/16/14 to add S/N to summary stats | |
#added item.c (raw correlation) 1/10/15 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment