Last active
April 29, 2019 09:32
-
-
Save nickpettican/b1f145c0fb76349b1127 to your computer and use it in GitHub Desktop.
Session04
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# CHAPTER 6: | |
# TWO SAMPLES | |
# The classical tests: | |
# - Comparing two variances: Fisher's F test | |
var.test | |
# - Comparing two sample means with normal errors: Student's t test | |
t.test | |
# - Comparing two means with non-normal errors: Wilcoxon's test | |
wilcox.test | |
# - Comparing two proportions: binomial test | |
prop.test | |
# - Comparing two variables: Pearson's or Spearman's rank correlation | |
cor.test | |
# - Testing for independence in contingency tables using chi-squared | |
chisq.test | |
# - Testing small samples for correlation with Fisher's exact test | |
fisher.test | |
# COMPARING TWO VARIANCES | |
# Before carrying out a test to compare two sample means we need to test whether the sample variances are significantly different | |
# Thus, Fisher's F test | |
# We divide the larger variance by the smaller variance, the larger variance goes on top | |
qf(0.975,9,9) | |
# This calculates the critical value of Fisher's F test | |
f.test.data<-read.csv("c:\\MSc\\Statistics\\Data\\f.test.data.csv") | |
attach(f.test.data) | |
names(f.test.data) | |
var(gardenB) | |
var(gardenC) | |
# this computes the two variances | |
# gradenC has the larger variance | |
F.ratio<-var(gardenC)/var(gardenB) | |
F.ratio | |
# the test shows that the variance in gardenC is more than 10 times as big as the variance in gardenB | |
# since the critical value is 4.026 - since the test statistic ios larger than the critical value we reject tje null hypothesis | |
# the variances are significantly different | |
2*(1-pf(F.ratio,9,9)) | |
var.test(gardenB,gardenC) | |
detach(f.test.data) | |
# COMPARING TWO MEANS | |
qt(0.975,18) | |
t.test.data <- read.csv("c:\\MSc\\Statistics\\Data\\t.test.data.csv") | |
attach(t.test.data) | |
names(t.test.data) | |
ozone <- c(gardenA,gardenB) | |
label <- factor(c(rep("A",10),rep("B",10))) | |
boxplot(ozone∼label,notch=T,xlab="Garden", | |
ylab="Ozone pphm",col="lightblue") | |
s2A <- var(gardenA) | |
s2B <- var(gardenB) | |
s2A/s2B | |
(mean(gardenA)-mean(gardenB))/sqrt(s2A/10+s2B/10) | |
2*pt(-3.872983,18) | |
t.test(gardenA,gardenB) | |
# WILCOXON RANK-SUM TEST | |
ozone <- c(gardenA,gardenB) | |
ozone | |
label <- c(rep("A",10),rep("B",10)) | |
label | |
combined.ranks <- rank(ozone) | |
combined.ranks | |
tapply(combined.ranks,label,sum) | |
wilcox.test(gardenA,gardenB) | |
# TEST ON PAIRED SAMPLES | |
streams <- read.csv("c:\\MSc\\Statistics\\Data\\streams.csv") | |
attach(streams) | |
names(streams) | |
t.test(down,up) | |
t.test(down,up,paired=T) | |
d <- up-down | |
t.test(d) | |
# THE BINOMIAL TEST | |
binom.test(1,9) | |
# BINOMIAL TESTS TO COMPARE TWO PROPORTIONS | |
prop.test(c(4,196),c(40,3270)) | |
# CHI-SQUARED CONTINGENCY TABLES | |
qchisq(0.95,1) | |
count <- matrix(c(38,14,11,51),nrow=2) | |
count | |
chisq.test(count) | |
chisq.test(count,correct=F) | |
# FISHER'S EXACT TEST | |
factorial(8)*factorial(12)*factorial(10)*factorial(10)/ | |
(factorial(6)*factorial(2)*factorial(4)*factorial(8)*factorial(20)) | |
factorial(8)*factorial(12)*factorial(10)*factorial(10)/ | |
(factorial(7)*factorial(3)*factorial(1)*factorial(9)*factorial(20)) | |
factorial(8)*factorial(12)*factorial(10)*factorial(10)/ | |
(factorial(8)*factorial(2)*factorial(0)*factorial(10)*factorial(20)) | |
0.07501786 + 0.009526078 + 0.000352279 | |
2*(0.07501786+0.009526078+0.000352279) | |
x <- as.matrix(c(6,4,2,8)) | |
dim(x) <- c(2,2) | |
x | |
fisher.test(x) | |
table <- read.csv("c:\\MSc\\Statistics\\Data\\fisher.csv") | |
attach(table) | |
head(table) | |
fisher.test(tree,nests) | |
# CORRELATION AND COVARIANCE | |
data <- read.csv("c:\\MSc\\Statistics\\Data\\twosample.csv") | |
attach(data) | |
plot(x,y,pch=21,col="blue",bg="orange") | |
var(x) | |
var(y) | |
var(x,y) | |
var(x,y)/sqrt(var(x)*var(y)) | |
cor(x,y) | |
# CORRELATION AND THE VARIANCE OF DIFFERENCES BETWEEN VARIABLES | |
paired <- read.csv("c:\\MSc\\Statistics\\Data\\water.table.csv ") | |
attach(paired) | |
names(paired) | |
cor(Summer, Winter) | |
cor.test(Summer, Winter) | |
varS <- var(Summer) | |
varW <- var(Winter) | |
varD <- var(Summer-Winter) | |
(varS+varW-varD)/(2*sqrt(varS)*sqrt(varW)) | |
varD | |
varS + varW | |
varS + varW - 2 * 0.8820102 * sqrt(varS) * sqrt(varW) | |
# SCALE-DEPENDENT CORRELATIONS | |
data <- read.csv("c:\\MSc\\Statistics\\Data\\productivity.csv") | |
attach(data) | |
names(data) | |
plot(productivity,mammals,pch=16,col="blue") | |
cor.test(productivity,mammals,method="spearman") | |
plot(productivity,mammals,pch=16,col=as.numeric(region)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment