Skip to content

Instantly share code, notes, and snippets.

@aaronsaunders
Created August 9, 2013 07:05
Show Gist options
  • Save aaronsaunders/6191693 to your computer and use it in GitHub Desktop.
Save aaronsaunders/6191693 to your computer and use it in GitHub Desktop.
# run a quick demonstration
install.packages("Hmisc",dependencies=TRUE)
update.packages()
library("Hmisc")
mydata<-spss.get("http://www.rci.rutgers.edu/~rwomack/R/vermont.sav",use.value.labels=TRUE)
head(mydata)
tail(mydata)
summary(mydata)
attach(mydata)
hist(AGEP)
# review data editor functions
quickdata<-data.frame (id=1)
fix (quickdata)
# load a simple csv file
statepop<-read.csv("http://www.rci.rutgers.edu/~rwomack/R/statepop.csv",header=TRUE)
statepop
mean(statepop$Population)
sd(statepop$Population)
attach(statepop)
mean(Population)
sd(Population)
hist(Population)
plot(Population)
ls()
#a basic function (Pearson's second coefficient of skewness)
pskew<-function(x)
{
(3*(mean(x)-median(x)))/sd(x)
}
pskew(Population)
#or can use a function to group output and access later
mystats<-function(x)
{
list(mean=mean(x),median=median(x),skew=pskew(x))
}
mylist<-mystats(statepop$Population)
mylist$skew
# saving your work
save.image("statepop.RData")
# working with indexes and datastructures
mode(statepop)
class(statepop)
newstate<-data.frame(State="Puerto Rico",Population=4000000)
rbind(statepop,newstate)
statepop[,1]
statepop[1,]
statepop["State"]
statepop[1,10]
statepop[Population>10000000,]
Population>10000000
statepop[!Population>10000000,]
# clean up workspace
detach(statepop)
rm(statepop)
ls()
# exploring a larger dataset - Vermont PUMS
mydata<-spss.get("http://www.rci.rutgers.edu/~rwomack/R/vermont.sav",use.value.labels=TRUE)
summary(mydata)
attributes(mydata)
str(mydata)
#subset
attach(mydata)
subdata<-data.frame(AGEP,SEX,MAR,SCHL,WAGP)
detach(mydata)
attach(subdata)
names(subdata)
print(subdata)
summary(subdata)
attributes(subdata)
str(subdata)
#start analyzing
table(MAR)
table(SEX)
hist(AGEP)
hist(AGEP, breaks=8)
hist(AGEP, breaks=8, probability=TRUE)
lines(density(AGEP))
qqnorm(WAGP)
boxplot(WAGP~SEX)
stripchart(WAGP~SEX, method="jitter")
#add a transformed variable
newdata<-transform(subdata,logwage=log(WAGP))
attach(newdata)
stripchart(logwage~SEX, method="jitter")
by(newdata,newdata["SEX"],summary)
#statistical tests
t.test(WAGP,mu=200000,conf.level=.99)
lm(WAGP~AGEP)
lm(WAGP~AGEP+SCHL)
myreg<-lm(WAGP~AGEP)
summary(myreg)
anova(myreg)
names(myreg)
print(myreg$coefficients)
cor.test(WAGP,AGEP)
chisq.test(WAGP,MAR)
#add-on packages
install.packages("Hmisc")
library("Hmisc")
contents(newdata)
describe(newdata)
detach(package:Hmisc)
#a few graphs
plot(AGEP,WAGP)
barchart(MAR)
pie(table(MAR))
interaction.plot(SEX,MAR,WAGP)
plot(myreg)
plot(AGEP,WAGP)
abline(lm(x~y))
barchart(MAR, horizontal="FALSE", ylab="number", main="Marital Status")
barplot(table(SEX,MAR),beside=TRUE, ylab="number",main="Marital Status by sex", col=c("slateblue","peachpuff"))
legend("topright",c("Male","Female"),fill=c("slateblue","peachpuff"))
#multiple panels
par(mfrow=c(2,2))
pie(table(MAR))
interaction.plot(SEX,MAR,WAGP)
plot(AGEP,WAGP)
abline(lm(WAGP~AGEP))
barplot(table(SEX,MAR),beside=TRUE, ylab="number",main="Marital Status by sex", col=c("slateblue","peachpuff"))
legend("topright",c("Male","Female"),fill=c("slateblue","peachpuff"))
# installing packages, GUI interfaces
install.packages("Rcmdr","rattle",dependencies=TRUE)
update.packages()
library("Rcmdr")
Rcmdr()
detach(package:Rcmdr)
library("rattle")
rattle()
detach(package:rattle)
#ending a session
save.image("vermont.RData")
q()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment