Skip to content

Instantly share code, notes, and snippets.

@nassimhaddad
Last active December 20, 2015 10:59
Show Gist options
  • Save nassimhaddad/6119745 to your computer and use it in GitHub Desktop.
Save nassimhaddad/6119745 to your computer and use it in GitHub Desktop.
an extended version of FactoMineR's MCA()
# install.packages("devtools")
# library(devtools)
# source_gist(6119745)
#' Example:
#'
#' data(tea)
#' newtea = tea[, c("Tea", "How", "how", "sugar", "where", "always")]
#' test <- MCA(newtea)
#' test <- add_modified_rates(test)
#' test$eig
#'
add_modified_rates <- function(mca1){
Q <- length(mca1$call$quali)
# compute modified inertia rates
lambda_m <- mean(mca1$eig$eigenvalue)
lambda_pseudo <- (mca1$eig$eigenvalue-lambda_m)^2*(Q/(Q-1))^2
pos_lambda <- mca1$eig$eigenvalue>lambda_m
S <- sum(lambda_pseudo[pos_lambda])
mca1$eig$modified_rate <- lambda_pseudo*pos_lambda/S*100
mca1$eig$cum_modified_rate <- cumsum(mca1$eig$modified_rate)
return(mca1)
}
#' MCA_extended
#'
#' mca of package FactoMineR
#' extended with:
#' * the modified inertia rates (Benzecri)
#' * the contribution of the variable to the cloud
#' * the weight of the variable
#'
#' Example:
#'
#' data(tea)
#' newtea = tea[, c("Tea", "How", "how", "sugar", "where", "always")]
#' test <- MCA_extended(newtea)
#' test$eig
#' test$var$additional
#'
MCA_extended <- function(X, ncp = 5, ...){
require(FactoMineR)
mca1 = MCA(X = X, ncp = ncp, ...)
# number of active questions.
# note that this needs to be modified to work with supplementary variables
Q <- ncol(X)
# compute modified inertia rates
lambda_m <- mean(mca1$eig$eigenvalue)
lambda_pseudo <- (mca1$eig$eigenvalue-lambda_m)^2*(Q/(Q-1))^2
pos_lambda <- mca1$eig$eigenvalue>lambda_m
S <- sum(lambda_pseudo[pos_lambda])
mca1$eig$modified_rate <- lambda_pseudo*pos_lambda/S*100
mca1$eig$cum_modified_rate <- cumsum(mca1$eig$modified_rate)
# compute contribution to cloud
get_freq <- function(df){
df_table <- do.call(rbind, lapply(df, function(x)as.data.frame(table(x, useNA = "ifany"))))
df_table <- df_table[!(df_table$Freq == 0),]
out <- df_table$Freq/nrow(df)
names(out) <- df_table$x
return(out)
}
# frequency by modality
f_k <- get_freq(X)
K <- length(f_k)
# weight and contribution rel to coud
mca1$var$additional <- data.frame(Ctr = (1-f_k)/(K-Q), Weight = f_k / Q)
row.names(mca1$var$additional) <- names(f_k)
# names(f_k)[duplicated(names(f_k))]
# df[, "ct:leaveChildToNeigh"]
return(mca1)
}
get_mca_var_table <- function(MCA_res){
out_df <- data.frame(Weight = MCA_res$var$additional$Weight,
Coord = MCA_res$var$coord,
Ctr = MCA_res$var$additional$Ctr,
Ctr= MCA_res$var$contrib)
rowNames <- row.names(MCA_res$var$coord)
new_names <- as.data.frame(do.call(rbind, strsplit(rowNames, ":|_")))
colnames(new_names) <- c("theme", "question", "modality")
out_df <- cbind(new_names, out_df)
return(out_df)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment