Last active
July 15, 2016 16:11
-
-
Save lecy/a783b44d0e902e2e97103c7253b3034c to your computer and use it in GitHub Desktop.
Example of basic regression functions in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Regression in R" | |
output: html_notebook: default | |
--- | |
# create some fake data | |
```{r} | |
x1 <- 1:100 | |
x2 <- -0.1*x1 + rnorm(100) | |
x3 <- 0.05*x2 + rnorm(100) | |
y <- 2*x1 + 10*rnorm(100) + 10*x2 | |
dat <- data.frame( y, x1, x2, x3 ) | |
head( dat ) | |
``` | |
# descriptive statistics | |
```{r} | |
library( pastecs ) | |
print( t( stat.desc( dat ) ), digits=3 ) | |
# To copy and paste into Excel: | |
# | |
# descriptives <- t( stat.desc(dat) ) | |
# | |
# write.table( descriptives, "clipboard", sep="\t", row.names=TRUE ) | |
``` | |
```{r} | |
# To create nicely formatted tables for markdown documents use the kable() function | |
library( knitr ) | |
kable( t( stat.desc( dat )[ c(1,4,5,8,9,13), ] ), format="markdown", digits=3 ) | |
``` | |
| | nbr.val| min| max| median| mean| std.dev| | |
|:--|-------:|-------:|-------:|------:|------:|-------:| | |
|y | 100| -11.210| 118.497| 50.591| 52.395| 31.342| | |
|x1 | 100| 1.000| 100.000| 50.500| 50.500| 29.011| | |
|x2 | 100| -10.851| 1.071| -5.097| -4.964| 3.135| | |
|x3 | 100| -2.548| 1.857| -0.409| -0.380| 1.060| | |
# pretty pairs plot | |
Convenient visual descriptives: | |
```{r} | |
pairs( dat ) | |
``` | |
This one is better: | |
```{r} | |
panel.cor <- function(x, y, digits=2, prefix="", cex.cor) | |
{ | |
usr <- par("usr"); on.exit(par(usr)) | |
par(usr = c(0, 1, 0, 1)) | |
r <- abs(cor(x, y)) | |
txt <- format(c(r, 0.123456789), digits=digits)[1] | |
txt <- paste(prefix, txt, sep="") | |
if(missing(cex.cor)) cex <- 0.8/strwidth(txt) | |
test <- cor.test(x,y) | |
# borrowed from printCoefmat | |
Signif <- symnum(test$p.value, corr = FALSE, na = FALSE, | |
cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), | |
symbols = c("***", "**", "*", ".", " ")) | |
text(0.5, 0.5, txt, cex = 1.5 ) | |
text(.7, .8, Signif, cex=cex, col=2) | |
} | |
pairs( dat, lower.panel=panel.smooth, upper.panel=panel.cor) | |
``` | |
# create some fake regression data | |
```{r} | |
x <- 1:100 | |
y <- 2*x + rnorm(100,0,20) | |
plot( x, y ) | |
dum <- sample( c("NJ","NY","MA","PA"), 100, replace=T ) | |
``` | |
# basic regression syntax | |
```{r} | |
lm( y ~ x ) | |
m.01 <- lm( y ~ x ) | |
summary( m.01 ) | |
``` | |
# nice visual diagnostics | |
```{r} | |
par( mfrow=c(2,2) ) | |
plot( m.01 ) | |
``` | |
# useful model fit functions | |
```{r} | |
coefficients( m.01 ) # model coefficients | |
confint( m.01, level=0.95) # CIs for model parameters | |
# not run because of long output | |
# anova( m.01 ) # anova table | |
# fitted( m.01 ) # predicted values | |
# residuals( m.01 ) # residuals | |
# influence( m.01 ) # regression diagnostics | |
``` | |
```{r, warning=F } | |
library( coefplot ) | |
m.02 <- lm( y ~ x1 + I(x1^2) + x2 + x3 ) | |
coefplot(m.02) | |
``` | |
# pretty output | |
```{r, warning=FALSE} | |
# install.packages( "memisc" ) | |
library( memisc ) | |
m.02 <- lm( y ~ x + I(x^2) ) # quadratic term | |
m.03 <- lm( y ~ x - 1 ) # no intercept term | |
pretty.table <- mtable("Model 1"=m.01,"Model 2"=m.02,"Model 3"=m.03, | |
summary.stats=c("R-squared","F","p","N")) | |
pretty.table | |
``` | |
# specification | |
```{r} | |
summary( lm( y ~ x1 + x2 + x3 ) ) | |
# add different functional forms | |
# square x1 | |
summary( lm( y ~ x1 + x1^2 + x2 + x3 ) ) # not right | |
summary( lm( y ~ x1 + I(x1^2) + x2 + x3 ) ) # like this | |
summary( lm( y ~ log(x1) + x2 + x3 ) ) # log of x1 in formula works fine | |
``` | |
```{r} | |
# interactions | |
summary( lm( y ~ x1 + x2 ) ) | |
summary( lm( y ~ x1 + x2 + I(x1*x2) ) ) | |
summary( lm( y ~ x1*x2 ) ) # shortcut | |
``` | |
```{r} | |
# dummy variables | |
summary( lm( y ~ x1 + x2 + x3 + dum ) ) # drop one level | |
summary( lm( y ~ x1 + x2 + x3 + dum - 1) ) # keep all, drop intercept | |
``` | |
# standardized regression coefficients (beta) | |
```{r} | |
# install.packages( "lm.beta" ) | |
library( lm.beta ) | |
m.01.beta <- lm.beta( m.01 ) | |
summary( m.01.beta ) | |
# coef( m.01.beta ) | |
``` | |
```{r} | |
# note the standard error is not standardized - describes regular coefficients not standardized | |
summary( m.01 ) | |
``` | |
# or just use the formula: | |
```{r} | |
lm.beta <- function( my.mod ) | |
{ | |
b <- summary(my.mod)$coef[-1, 1] | |
sx <- sd( my.mod$model[,-1] ) | |
sy <- sd( my.mod$model[,1] ) | |
beta <- b * sx/sy | |
return(beta) | |
} | |
coefficients( m.01 ) | |
lm.beta( m.01 ) | |
``` | |
# robust standard errors | |
```{r, warning=FALSE} | |
# install.packages( "sandwhich" ) | |
# install.packages( "lmtest" ) | |
library(sandwich) | |
library(lmtest) | |
m.01 <- lm( y ~ x ) | |
# REGULAR STANDARD ERRORS | |
summary( m.01 ) # not-robust | |
``` | |
```{r} | |
# ROBUST STANDARD ERRORS | |
# reproduce the Stata default | |
coeftest( m.01, vcov=vcovHC(m.01,"HC1") ) # robust; HC1 (Stata default) | |
``` | |
```{r} | |
# ROBUST STANDARD ERRORS | |
# check that "sandwich" returns HC0 | |
coeftest(m.01, vcov = sandwich) # robust; sandwich | |
coeftest(m.01, vcov = vcovHC(m.01, "HC0")) # robust; HC0 | |
``` | |
```{r} | |
# ROBUST STANDARD ERRORS | |
# check that the default robust var-cov matrix is HC3 | |
coeftest(m.01, vcov = vcovHC(m.01)) # robust; HC3 | |
coeftest(m.01, vcov = vcovHC(m.01, "HC3")) # robust; HC3 (default) | |
``` | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment