Created
February 17, 2015 17:33
-
-
Save mmparker/f52363e523a0d0fe1570 to your computer and use it in GitHub Desktop.
Quick illustration of variable transformations
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This is a quick script to illustrate how to go about transforming variables | |
| # for statistical analysis and the effects of some basic transformations. | |
| # I'm by no means an expert on transformations, so be sure to read up on | |
| # how to best apply the transformations! | |
| # These two packages are for demonstrating the transformations - | |
| # not necessary for the transformations themselves. | |
| library(reshape2) | |
| library(ggplot2) | |
| # A sample dataset | |
| dat <- data.frame(id = seq(100), | |
| a = rnorm(100, mean = 0, sd = 1), # Standard normal | |
| b = rnorm(100, mean = 20, sd = 5), # Shifted & more variable | |
| c = rlnorm(100) # Log-normal | |
| ) | |
| # Add the transformed variables - if you really have a lot of varitables | |
| # to transform it might be easier to use apply(). | |
| # A few different transformations to try: | |
| # - scale() calculates z-scores based on the standard deviations; good for standardizing normal-ish | |
| # - log() is the standard transformation for positive, right-skewed distributions | |
| dat <- transform(dat, | |
| a_scale = scale(a), | |
| b_scale = scale(b), | |
| c_log = log(c) | |
| ) | |
| # Just for plotting purposes, I'm going to melt this data so that each | |
| # value gets one row | |
| dat_melt <- melt(dat, id.var = "id") | |
| # And so I can plot the differences, I'm going to add two variables | |
| # indicating the original variable and the transformation status of | |
| # each record. | |
| dat_melt$variable_original <- substr(dat_melt$variable, 1, 1) | |
| dat_melt$transformed <- ifelse(dat_melt$variable %in% c("a", "b", "c"), | |
| yes = "Original", | |
| no = "Transformed" | |
| ) | |
| ggplot(dat_melt, aes(x = value, fill = variable_original)) + | |
| geom_density(alpha = 0.2) + # Alpha less than 1 makes objects transparent | |
| facet_grid(variable_original ~ transformed) + | |
| theme_bw() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment