Skip to content

Instantly share code, notes, and snippets.

@nickpettican
Last active April 29, 2019 09:32
Show Gist options
  • Save nickpettican/b1f145c0fb76349b1127 to your computer and use it in GitHub Desktop.
Save nickpettican/b1f145c0fb76349b1127 to your computer and use it in GitHub Desktop.
Session04
# CHAPTER 6:
# TWO SAMPLES
# The classical tests:
# - Comparing two variances: Fisher's F test
var.test
# - Comparing two sample means with normal errors: Student's t test
t.test
# - Comparing two means with non-normal errors: Wilcoxon's test
wilcox.test
# - Comparing two proportions: binomial test
prop.test
# - Comparing two variables: Pearson's or Spearman's rank correlation
cor.test
# - Testing for independence in contingency tables using chi-squared
chisq.test
# - Testing small samples for correlation with Fisher's exact test
fisher.test
# COMPARING TWO VARIANCES
# Before carrying out a test to compare two sample means we need to test whether the sample variances are significantly different
# Thus, Fisher's F test
# We divide the larger variance by the smaller variance, the larger variance goes on top
qf(0.975,9,9)
# This calculates the critical value of Fisher's F test
f.test.data<-read.csv("c:\\MSc\\Statistics\\Data\\f.test.data.csv")
attach(f.test.data)
names(f.test.data)
var(gardenB)
var(gardenC)
# this computes the two variances
# gradenC has the larger variance
F.ratio<-var(gardenC)/var(gardenB)
F.ratio
# the test shows that the variance in gardenC is more than 10 times as big as the variance in gardenB
# since the critical value is 4.026 - since the test statistic ios larger than the critical value we reject tje null hypothesis
# the variances are significantly different
2*(1-pf(F.ratio,9,9))
var.test(gardenB,gardenC)
detach(f.test.data)
# COMPARING TWO MEANS
qt(0.975,18)
t.test.data <- read.csv("c:\\MSc\\Statistics\\Data\\t.test.data.csv")
attach(t.test.data)
names(t.test.data)
ozone <- c(gardenA,gardenB)
label <- factor(c(rep("A",10),rep("B",10)))
boxplot(ozone∼label,notch=T,xlab="Garden",
ylab="Ozone pphm",col="lightblue")
s2A <- var(gardenA)
s2B <- var(gardenB)
s2A/s2B
(mean(gardenA)-mean(gardenB))/sqrt(s2A/10+s2B/10)
2*pt(-3.872983,18)
t.test(gardenA,gardenB)
# WILCOXON RANK-SUM TEST
ozone <- c(gardenA,gardenB)
ozone
label <- c(rep("A",10),rep("B",10))
label
combined.ranks <- rank(ozone)
combined.ranks
tapply(combined.ranks,label,sum)
wilcox.test(gardenA,gardenB)
# TEST ON PAIRED SAMPLES
streams <- read.csv("c:\\MSc\\Statistics\\Data\\streams.csv")
attach(streams)
names(streams)
t.test(down,up)
t.test(down,up,paired=T)
d <- up-down
t.test(d)
# THE BINOMIAL TEST
binom.test(1,9)
# BINOMIAL TESTS TO COMPARE TWO PROPORTIONS
prop.test(c(4,196),c(40,3270))
# CHI-SQUARED CONTINGENCY TABLES
qchisq(0.95,1)
count <- matrix(c(38,14,11,51),nrow=2)
count
chisq.test(count)
chisq.test(count,correct=F)
# FISHER'S EXACT TEST
factorial(8)*factorial(12)*factorial(10)*factorial(10)/
(factorial(6)*factorial(2)*factorial(4)*factorial(8)*factorial(20))
factorial(8)*factorial(12)*factorial(10)*factorial(10)/
(factorial(7)*factorial(3)*factorial(1)*factorial(9)*factorial(20))
factorial(8)*factorial(12)*factorial(10)*factorial(10)/
(factorial(8)*factorial(2)*factorial(0)*factorial(10)*factorial(20))
0.07501786 + 0.009526078 + 0.000352279
2*(0.07501786+0.009526078+0.000352279)
x <- as.matrix(c(6,4,2,8))
dim(x) <- c(2,2)
x
fisher.test(x)
table <- read.csv("c:\\MSc\\Statistics\\Data\\fisher.csv")
attach(table)
head(table)
fisher.test(tree,nests)
# CORRELATION AND COVARIANCE
data <- read.csv("c:\\MSc\\Statistics\\Data\\twosample.csv")
attach(data)
plot(x,y,pch=21,col="blue",bg="orange")
var(x)
var(y)
var(x,y)
var(x,y)/sqrt(var(x)*var(y))
cor(x,y)
# CORRELATION AND THE VARIANCE OF DIFFERENCES BETWEEN VARIABLES
paired <- read.csv("c:\\MSc\\Statistics\\Data\\water.table.csv ")
attach(paired)
names(paired)
cor(Summer, Winter)
cor.test(Summer, Winter)
varS <- var(Summer)
varW <- var(Winter)
varD <- var(Summer-Winter)
(varS+varW-varD)/(2*sqrt(varS)*sqrt(varW))
varD
varS + varW
varS + varW - 2 * 0.8820102 * sqrt(varS) * sqrt(varW)
# SCALE-DEPENDENT CORRELATIONS
data <- read.csv("c:\\MSc\\Statistics\\Data\\productivity.csv")
attach(data)
names(data)
plot(productivity,mammals,pch=16,col="blue")
cor.test(productivity,mammals,method="spearman")
plot(productivity,mammals,pch=16,col=as.numeric(region))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment