Skip to content

Instantly share code, notes, and snippets.

@nickpettican
Created November 23, 2015 13:55
Show Gist options
  • Save nickpettican/f7c9aa53d3243e8b8c01 to your computer and use it in GitHub Desktop.
Save nickpettican/f7c9aa53d3243e8b8c01 to your computer and use it in GitHub Desktop.
Session04.R
# CHAPTER 7
# LINEAR REGRESSION
reg.data <- read.csv("c:\\MSc\\Statistics\\Data\\tannin.csv")
attach(reg.data)
names(reg.data)
plot(tannin,growth,pch=21,bg="blue")
# LINEAR REGRESSION IN R
lm(growth∼tannin)
abline(lm(growth∼tannin),col="green")
fitted <- predict(lm(growth∼tannin))
fitted
lines(c(0,0),c(12,11.755556))
for (i in 1:9)
lines (c(tannin[i],tannin[i]),c(growth[i],fitted[i]),col="red")
b <- seq(-1.43,-1,0.002)
sse <- numeric(length(b))
for (i in 1:length(b)) {
a <- mean(growth)-b[i]*mean(tannin)
residual <- growth - a - b[i]*tannin
sse[i] <- sum(residual^2)
}
plot(b,sse,type="l",ylim=c(19,24))
arrows(-1.216,20.07225,-1.216,19,col="red")
abline(h=20.07225,col="green",lty=2)
lines(b,sse)
b[which(sse==min(sse))]
# CALCULATIONS INVOLVED IN LINEAR REGRESSION
tannin
growth
tannin*growth
sum(tannin*growth)
SSX <- sum(tannin^2)-sum(tannin)^2/length(tannin)
SSX
SSY <- sum(growth^2)-sum(growth)^2/length(growth)
SSY
SSXY <- sum(tannin*growth)-sum(tannin)*sum(growth)/length(tannin)
SSXY
# PARTITIONING SUMS OF SQUARES IN REGRESSION: SSY = SSR + SSE
qf(0.95,1,7)
1-pf(30.974,1,7)
model <- lm(growth∼tannin)
summary(model)
summary.aov(model)
# MODEL CHECKING
par(mfrow=c(2,2))
plot(model)
# TRANSFORMATION
par(mfrow=c(1,1))
data <- read.csv("c:\\MSc\\Statistics\\Data\\decay.csv")
attach(data)
names(data)
plot(time,amount,pch=21,col="blue",bg="green")
abline(lm(amount∼time),col="red")
summary(lm(amount∼time))
plot(time,log(amount),pch=21,col="blue",bg="red")
abline(lm(log(amount)∼time),col="blue")
model <- lm(log(amount)∼time)
summary(model)
upper <- 4.547386 + 0.100295
lower <- 4.547386 - 0.100295
exp(upper)
exp(lower)
exp(4.547386)
par(mfrow=c(1,1))
plot(time,amount,pch=21,col="blue",bg="green")
xv <- seq(0,30,0.25)
yv <- 94.38536 * exp(-0.068528 * xv)
lines(xv,yv,col="red")
# POLYNOMIAL REGRESSION
par(mfrow=c(2,2))
curve(4+2*x-0.1*x^2,0,10,col="red",ylab="y")
curve(4+2*x-0.2*x^2,0,10,col="red",ylab="y")
curve(12-4*x+0.3*x^2,0,10,col="red",ylab="y")
curve(4+0.5*x+0.1*x^2,0,10,col="red",ylab="y")
model2 <- lm(amount∼time)
model3 <- lm(amount∼time+I(time^2))
summary(model3)
AIC(model2,model3)
anova(model2,model3)
# NON-LINEAR REGRESSION
exp(-Inf)
exp(-0)
deer <- read.csv("c:\\MSc\\Statistics\\Data\\jaws.csv")
attach(deer)
names(deer)
par(mfrow=c(1,1))
plot(age,bone,pch=21,bg="lightgrey")
model <- nls(bone∼a-b*exp(-c*age),start=list(a=120,b=110,c=0.064))
summary(model)
model2 <- nls(bone∼a*(1-exp(-c*age)),start=list(a=120,c=0.064))
anova(model,model2)
av <- seq(0,50,0.1)
bv <- predict(model2,list(age=av))
lines(av,bv,col="blue")
summary(model2)
null.model <- lm(bone ∼ 1)
summary.aov(null.model)
100*(59008-8923.72)/59008
# GENERALIZED ADDITIVE MODELS
library(mgcv)
hump <- read.csv("c:\\MSc\\Statistics\\Data\\hump.csv")
attach(hump)
names(hump)
model <- gam(y∼s(x))
plot(model,col="blue")
points(x,y-mean(y),pch=21,bg="red")
summary(model)
# INFLUENCE
x <- c(2,3,3,3,4)
y <- c(2,3,2,1,2)
windows(7,4)
par(mfrow=c(1,2))
plot(x,y,xlim=c(0,8),ylim=c(0,8))
x1 <- c(x,7)
y1 <- c(y,6)
plot(x1,y1,xlim=c(0,8),ylim=c(0,8))
abline(lm(y1∼x1),col="blue")
reg <- lm(y1∼x1)
influence.measures(reg)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment