Last active
December 20, 2015 10:59
-
-
Save nassimhaddad/6119745 to your computer and use it in GitHub Desktop.
an extended version of FactoMineR's MCA()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install.packages("devtools") | |
# library(devtools) | |
# source_gist(6119745) | |
#' Example: | |
#' | |
#' data(tea) | |
#' newtea = tea[, c("Tea", "How", "how", "sugar", "where", "always")] | |
#' test <- MCA(newtea) | |
#' test <- add_modified_rates(test) | |
#' test$eig | |
#' | |
add_modified_rates <- function(mca1){ | |
Q <- length(mca1$call$quali) | |
# compute modified inertia rates | |
lambda_m <- mean(mca1$eig$eigenvalue) | |
lambda_pseudo <- (mca1$eig$eigenvalue-lambda_m)^2*(Q/(Q-1))^2 | |
pos_lambda <- mca1$eig$eigenvalue>lambda_m | |
S <- sum(lambda_pseudo[pos_lambda]) | |
mca1$eig$modified_rate <- lambda_pseudo*pos_lambda/S*100 | |
mca1$eig$cum_modified_rate <- cumsum(mca1$eig$modified_rate) | |
return(mca1) | |
} | |
#' MCA_extended | |
#' | |
#' mca of package FactoMineR | |
#' extended with: | |
#' * the modified inertia rates (Benzecri) | |
#' * the contribution of the variable to the cloud | |
#' * the weight of the variable | |
#' | |
#' Example: | |
#' | |
#' data(tea) | |
#' newtea = tea[, c("Tea", "How", "how", "sugar", "where", "always")] | |
#' test <- MCA_extended(newtea) | |
#' test$eig | |
#' test$var$additional | |
#' | |
MCA_extended <- function(X, ncp = 5, ...){ | |
require(FactoMineR) | |
mca1 = MCA(X = X, ncp = ncp, ...) | |
# number of active questions. | |
# note that this needs to be modified to work with supplementary variables | |
Q <- ncol(X) | |
# compute modified inertia rates | |
lambda_m <- mean(mca1$eig$eigenvalue) | |
lambda_pseudo <- (mca1$eig$eigenvalue-lambda_m)^2*(Q/(Q-1))^2 | |
pos_lambda <- mca1$eig$eigenvalue>lambda_m | |
S <- sum(lambda_pseudo[pos_lambda]) | |
mca1$eig$modified_rate <- lambda_pseudo*pos_lambda/S*100 | |
mca1$eig$cum_modified_rate <- cumsum(mca1$eig$modified_rate) | |
# compute contribution to cloud | |
get_freq <- function(df){ | |
df_table <- do.call(rbind, lapply(df, function(x)as.data.frame(table(x, useNA = "ifany")))) | |
df_table <- df_table[!(df_table$Freq == 0),] | |
out <- df_table$Freq/nrow(df) | |
names(out) <- df_table$x | |
return(out) | |
} | |
# frequency by modality | |
f_k <- get_freq(X) | |
K <- length(f_k) | |
# weight and contribution rel to coud | |
mca1$var$additional <- data.frame(Ctr = (1-f_k)/(K-Q), Weight = f_k / Q) | |
row.names(mca1$var$additional) <- names(f_k) | |
# names(f_k)[duplicated(names(f_k))] | |
# df[, "ct:leaveChildToNeigh"] | |
return(mca1) | |
} | |
get_mca_var_table <- function(MCA_res){ | |
out_df <- data.frame(Weight = MCA_res$var$additional$Weight, | |
Coord = MCA_res$var$coord, | |
Ctr = MCA_res$var$additional$Ctr, | |
Ctr= MCA_res$var$contrib) | |
rowNames <- row.names(MCA_res$var$coord) | |
new_names <- as.data.frame(do.call(rbind, strsplit(rowNames, ":|_"))) | |
colnames(new_names) <- c("theme", "question", "modality") | |
out_df <- cbind(new_names, out_df) | |
return(out_df) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment