Created
September 18, 2012 22:39
-
-
Save dsparks/3746443 to your computer and use it in GitHub Desktop.
Categorical data to indicator matrix to log odds ratios
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Starting with categorical data, ending with a table of log odds ratios | |
doInstall <- TRUE # Change to FALSE if you don't want packages installed. | |
toInstall <- c("plyr", "reshape2") | |
if(doInstall){install.packages(toInstall, | |
repos = "http://cran.us.r-project.org")} | |
lapply(toInstall, library, character.only = TRUE) | |
# Canonical example of categorical data | |
HEC <- melt(HairEyeColor) | |
HEC <- HEC[rep(1:nrow(HEC), HEC[, 4]), -4] | |
colnames(HEC) <- c("Hair", "Eye", "Gender") | |
head(HEC) # This df has a row for each observation | |
# Convert matrix of factors to matrix of indicator variables | |
indicatorMatrix <- model.matrix(~ ., data = HEC, | |
contrasts.arg = sapply(HEC, contrasts, contrasts = FALSE))[, -1] | |
# (from http://stackoverflow.com/a/4569239/479554) | |
head(indicatorMatrix) | |
# Make a table of log odd ratios between categories | |
TT <- t(indicatorMatrix) %*% indicatorMatrix # Has both (two true) | |
TF <- t(indicatorMatrix) %*% !indicatorMatrix # Has one, but not other | |
FT <- t(!indicatorMatrix) %*% indicatorMatrix # etc. | |
FF <- t(!indicatorMatrix) %*% !indicatorMatrix # etc. | |
oddsRatios <- (TT / TF) / (FT / FF) | |
logOddsRatios <- log(oddsRatios) | |
arrange(melt(logOddsRatios)[melt(upper.tri(logOddsRatios))[, 3], ], value) | |
# upper.tri indicates which items in a matrix are in the upper triangle. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment