Skip to content

Instantly share code, notes, and snippets.

@nassimhaddad
Created July 9, 2013 20:07
Show Gist options
  • Save nassimhaddad/5960799 to your computer and use it in GitHub Desktop.
Save nassimhaddad/5960799 to your computer and use it in GitHub Desktop.
MCA in R, including many important variables
require(FactoMineR)
# load data tea
data(tea)
# select these columns
newtea = tea[, c("Tea", "How", "how", "sugar", "where", "always")]
mca1 = MCA(newtea, graph = FALSE, ncp = 5)
# coordinates of the categories of the variables along the dimensions
y_kl <- mca1$var$coord
# eigenvalues
lambda_l <- mca1$eig$eigenvalue[1:5]
# Q =nbr of active variables
Q <- ncol(newtea)
# nb categories per variables
cats = apply(newtea, 2, function(x) nlevels(as.factor(x)))
# nb of modalities
K <- sum(cats)
# get_freq(newtea)
get_freq <- function(df){
df_table <- do.call(rbind, lapply(df, function(x)as.data.frame(table(x))))
out <- df_table$Freq/nrow(df)
names(out) <- df_table$x
return(out)
}
# frequency by modality
f_k <- get_freq(newtea)
# weight by modality
p_k <- f_k / Q
# contribution rel to coud
ctr_k <- (1-f_k)/(K-Q)
# contribtion rel to dimension
ctr_kl <- sweep(y_kl^2,1,p_k,FUN="*")
ctr_kl <- sweep(ctr_kl, 2, lambda_l, FUN = "/")
# compare with:
mca1$var$contrib
# conclusion: mca1$var$contrib = ctr_kl * 100
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment