Skip to content

Instantly share code, notes, and snippets.

@geofferyzh
Created May 23, 2012 16:48
Show Gist options
  • Save geofferyzh/2776306 to your computer and use it in GitHub Desktop.
Save geofferyzh/2776306 to your computer and use it in GitHub Desktop.
RinAction - Basic Statistics & Grouped Statistics
#-----------------------------------------------------------------------------#
#-----------------------------------------------------------------------------#
# R in Action - Basic Statistics
# - Descriptive Statistics
#-----------------------------------------------------------------------------#
#-----------------------------------------------------------------------------#
install.packages(c('npmc', 'ggm', 'gmodels', 'vcd', 'Hmisc','pastecs', 'psych', 'doBy', 'reshape'))
##############################################
##############################################
# -------- Descriptive Statistics ---------- #
##############################################
##############################################
vars <- c("mpg", "hp", "wt")
head(mtcars[vars])
#############################
### A. Simple Descriptive Stats
#############################
# 1 - descriptive stats via summary
summary(mtcars[vars])
# 2 - descriptive stats via sapply()
mystats <- function(x, na.omit = FALSE) {
if (na.omit)
x <- x[!is.na(x)]
m <- mean(x)
n <- length(x)
s <- sd(x)
skew <- sum((x - m)^3/s^3)/n
kurt <- sum((x - m)^4/s^4)/n - 3
return(c(n = n, mean = m, stdev = s, skew = skew, kurtosis = kurt))
}
sapply(mtcars[vars], mean)
mystats(mtcars[vars],na.omit=TRUE)
a <-lapply(mtcars[vars], mystats)
b <-sapply(mtcars[vars], mystats)
c <- apply(mtcars[vars],2,mystats)
# 3 - Descriptive statistics (Hmisc package)
library(Hmisc)
describe(mtcars[vars])
# 4 - Descriptive statistics (pastecs package)
library(pastecs)
stat.desc(mtcars[vars])
# 5 - Descriptive statistics (psych package)
library(psych)
describe(mtcars[vars])
###################################
### B. Descriptive Stats BY GROUP
###################################
# 6 - Descriptive statistics by group with aggregate() # Only allows single value funtion for each call
aggregate(mtcars[vars], by = list(am = mtcars$am), mean)
aggregate(mtcars[vars], by = list(am = mtcars$am), sd)
# 7 - Descriptive statistics by group via by() # allows multiple functions per call
dstats <- function(x)(c(mean=sapply(x,mean), sd=sapply(x,sd)))
by(mtcars[vars], mtcars$am, dstats)
# 8 Summary statists by group (doBy package)
library(doBy)
summaryBy(mpg + hp + wt ~ am, data = mtcars, FUN = mystats)
# 9 - Summary statistics by group (psych package)
library(psych)
describe.by(mtcars[vars], mtcars$am)
# 10 Summary statistics by group (reshape package)
library(reshape)
dstats <- function(x) (c(n = length(x), mean = mean(x),
sd = sd(x)))
dfm <- melt(mtcars, measure.vars = c("mpg", "hp",
"wt"), id.vars = c("am", "cyl"))
cast(dfm, am + cyl + variable ~ ., dstats)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment