nickpettican · April 29, 2019 09:32
diff --git a/stats04 b/stats04
 # CHAPTER 6:

 # TWO SAMPLES

 # The classical tests:
 # - Comparing two variances: Fisher's F test
 var.test
 # - Comparing two sample means with normal errors: Student's t test
 t.test
 # - Comparing two means with non-normal errors: Wilcoxon's test
 wilcox.test
 # - Comparing two proportions: binomial test
 prop.test
 # - Comparing two variables: Pearson's or Spearman's rank correlation
 cor.test
 # - Testing for independence in contingency tables using chi-squared
 chisq.test
 # - Testing small samples for correlation with Fisher's exact test
 fisher.test

 # COMPARING TWO VARIANCES 

 # Before carrying out a test to compare two sample means we need to test whether the sample variances are significantly different
 # Thus, Fisher's F test
 # We divide the larger variance by the smaller variance, the larger variance goes on top

 qf(0.975,9,9)
 # This calculates the critical value of Fisher's F test

 f.test.data<-read.csv("c:\\MSc\\Statistics\\Data\\f.test.data.csv")
 attach(f.test.data)
 names(f.test.data)

 var(gardenB)
 var(gardenC)
 # this computes the two variances
 # gradenC has the larger variance

 F.ratio<-var(gardenC)/var(gardenB)
 F.ratio
 # the test shows that the variance in gardenC is more than 10 times as big as the variance in gardenB
 # since the critical value is 4.026 - since the test statistic ios larger than the critical value we reject tje null hypothesis
 # the variances are significantly different

 2*(1-pf(F.ratio,9,9))
 var.test(gardenB,gardenC)
 detach(f.test.data)

 # COMPARING TWO MEANS

 qt(0.975,18)

 t.test.data <- read.csv("c:\\MSc\\Statistics\\Data\\t.test.data.csv")
 attach(t.test.data)
 names(t.test.data)

 ozone <- c(gardenA,gardenB)
 label <- factor(c(rep("A",10),rep("B",10)))
 boxplot(ozone∼label,notch=T,xlab="Garden",
  ylab="Ozone pphm",col="lightblue")

 s2A <- var(gardenA)
 s2B <- var(gardenB)

 s2A/s2B

 (mean(gardenA)-mean(gardenB))/sqrt(s2A/10+s2B/10)

 2*pt(-3.872983,18)

 t.test(gardenA,gardenB)

 # WILCOXON RANK-SUM TEST

 ozone <- c(gardenA,gardenB)
 ozone

 label <- c(rep("A",10),rep("B",10))
 label

 combined.ranks <- rank(ozone)
 combined.ranks

 tapply(combined.ranks,label,sum)

 wilcox.test(gardenA,gardenB)

 # TEST ON PAIRED SAMPLES

 streams <- read.csv("c:\\MSc\\Statistics\\Data\\streams.csv")
 attach(streams)
 names(streams)

 t.test(down,up)

 t.test(down,up,paired=T)

 d <- up-down
 t.test(d)

 # THE BINOMIAL TEST

 binom.test(1,9)

 # BINOMIAL TESTS TO COMPARE TWO PROPORTIONS

 prop.test(c(4,196),c(40,3270))

 # CHI-SQUARED CONTINGENCY TABLES

 qchisq(0.95,1)

 count <- matrix(c(38,14,11,51),nrow=2)
 count

 chisq.test(count)

 chisq.test(count,correct=F)

 # FISHER'S EXACT TEST

 factorial(8)*factorial(12)*factorial(10)*factorial(10)/
  (factorial(6)*factorial(2)*factorial(4)*factorial(8)*factorial(20))

 factorial(8)*factorial(12)*factorial(10)*factorial(10)/
  (factorial(7)*factorial(3)*factorial(1)*factorial(9)*factorial(20))

 factorial(8)*factorial(12)*factorial(10)*factorial(10)/
  (factorial(8)*factorial(2)*factorial(0)*factorial(10)*factorial(20))
  
 0.07501786 + 0.009526078 + 0.000352279

 2*(0.07501786+0.009526078+0.000352279)

 x <- as.matrix(c(6,4,2,8))
 dim(x) <- c(2,2)
 x

 fisher.test(x)

 table <- read.csv("c:\\MSc\\Statistics\\Data\\fisher.csv")
 attach(table)
 head(table)

 fisher.test(tree,nests)

 # CORRELATION AND COVARIANCE

 data <- read.csv("c:\\MSc\\Statistics\\Data\\twosample.csv")
 attach(data)
 plot(x,y,pch=21,col="blue",bg="orange")

 var(x)

 var(y)

 var(x,y)

 var(x,y)/sqrt(var(x)*var(y))

 cor(x,y)

 # CORRELATION AND THE VARIANCE OF DIFFERENCES BETWEEN VARIABLES

 paired <- read.csv("c:\\MSc\\Statistics\\Data\\water.table.csv ")
 attach(paired)
 names(paired)

 cor(Summer, Winter)

 cor.test(Summer, Winter)

 varS <- var(Summer)
 varW <- var(Winter)
 varD <- var(Summer-Winter)

 (varS+varW-varD)/(2*sqrt(varS)*sqrt(varW))

 varD

 varS + varW

 varS + varW - 2 * 0.8820102 * sqrt(varS) * sqrt(varW)

 # SCALE-DEPENDENT CORRELATIONS

 data <- read.csv("c:\\MSc\\Statistics\\Data\\productivity.csv")
 attach(data)
 names(data)

 plot(productivity,mammals,pch=16,col="blue")

 cor.test(productivity,mammals,method="spearman")

 plot(productivity,mammals,pch=16,col=as.numeric(region))
	# CHAPTER 6:

	# TWO SAMPLES

	# The classical tests:
	# - Comparing two variances: Fisher's F test
	var.test
	# - Comparing two sample means with normal errors: Student's t test
	t.test
	# - Comparing two means with non-normal errors: Wilcoxon's test
	wilcox.test
	# - Comparing two proportions: binomial test
	prop.test
	# - Comparing two variables: Pearson's or Spearman's rank correlation
	cor.test
	# - Testing for independence in contingency tables using chi-squared
	chisq.test
	# - Testing small samples for correlation with Fisher's exact test
	fisher.test

	# COMPARING TWO VARIANCES

	# Before carrying out a test to compare two sample means we need to test whether the sample variances are significantly different
	# Thus, Fisher's F test
	# We divide the larger variance by the smaller variance, the larger variance goes on top

	qf(0.975,9,9)
	# This calculates the critical value of Fisher's F test

	f.test.data<-read.csv("c:\\MSc\\Statistics\\Data\\f.test.data.csv")
	attach(f.test.data)
	names(f.test.data)

	var(gardenB)
	var(gardenC)
	# this computes the two variances
	# gradenC has the larger variance

	F.ratio<-var(gardenC)/var(gardenB)
	F.ratio
	# the test shows that the variance in gardenC is more than 10 times as big as the variance in gardenB
	# since the critical value is 4.026 - since the test statistic ios larger than the critical value we reject tje null hypothesis
	# the variances are significantly different

	2*(1-pf(F.ratio,9,9))
	var.test(gardenB,gardenC)
	detach(f.test.data)

	# COMPARING TWO MEANS

	qt(0.975,18)

	t.test.data <- read.csv("c:\\MSc\\Statistics\\Data\\t.test.data.csv")
	attach(t.test.data)
	names(t.test.data)

	ozone <- c(gardenA,gardenB)
	label <- factor(c(rep("A",10),rep("B",10)))
	boxplot(ozone∼label,notch=T,xlab="Garden",
	ylab="Ozone pphm",col="lightblue")

	s2A <- var(gardenA)
	s2B <- var(gardenB)

	s2A/s2B

	(mean(gardenA)-mean(gardenB))/sqrt(s2A/10+s2B/10)

	2*pt(-3.872983,18)

	t.test(gardenA,gardenB)

	# WILCOXON RANK-SUM TEST

	ozone <- c(gardenA,gardenB)
	ozone

	label <- c(rep("A",10),rep("B",10))
	label

	combined.ranks <- rank(ozone)
	combined.ranks

	tapply(combined.ranks,label,sum)

	wilcox.test(gardenA,gardenB)

	# TEST ON PAIRED SAMPLES

	streams <- read.csv("c:\\MSc\\Statistics\\Data\\streams.csv")
	attach(streams)
	names(streams)

	t.test(down,up)

	t.test(down,up,paired=T)

	d <- up-down
	t.test(d)

	# THE BINOMIAL TEST

	binom.test(1,9)

	# BINOMIAL TESTS TO COMPARE TWO PROPORTIONS

	prop.test(c(4,196),c(40,3270))

	# CHI-SQUARED CONTINGENCY TABLES

	qchisq(0.95,1)

	count <- matrix(c(38,14,11,51),nrow=2)
	count

	chisq.test(count)

	chisq.test(count,correct=F)

	# FISHER'S EXACT TEST

	factorial(8)factorial(12)factorial(10)*factorial(10)/
	(factorial(6)factorial(2)factorial(4)factorial(8)factorial(20))

	factorial(8)factorial(12)factorial(10)*factorial(10)/
	(factorial(7)factorial(3)factorial(1)factorial(9)factorial(20))

	factorial(8)factorial(12)factorial(10)*factorial(10)/
	(factorial(8)factorial(2)factorial(0)factorial(10)factorial(20))

	0.07501786 + 0.009526078 + 0.000352279

	2*(0.07501786+0.009526078+0.000352279)

	x <- as.matrix(c(6,4,2,8))
	dim(x) <- c(2,2)
	x

	fisher.test(x)

	table <- read.csv("c:\\MSc\\Statistics\\Data\\fisher.csv")
	attach(table)
	head(table)

	fisher.test(tree,nests)

	# CORRELATION AND COVARIANCE

	data <- read.csv("c:\\MSc\\Statistics\\Data\\twosample.csv")
	attach(data)
	plot(x,y,pch=21,col="blue",bg="orange")

	var(x)

	var(y)

	var(x,y)

	var(x,y)/sqrt(var(x)*var(y))

	cor(x,y)

	# CORRELATION AND THE VARIANCE OF DIFFERENCES BETWEEN VARIABLES

	paired <- read.csv("c:\\MSc\\Statistics\\Data\\water.table.csv ")
	attach(paired)
	names(paired)

	cor(Summer, Winter)

	cor.test(Summer, Winter)

	varS <- var(Summer)
	varW <- var(Winter)
	varD <- var(Summer-Winter)

	(varS+varW-varD)/(2sqrt(varS)sqrt(varW))

	varD

	varS + varW

	varS + varW - 2 * 0.8820102 * sqrt(varS) * sqrt(varW)

	# SCALE-DEPENDENT CORRELATIONS

	data <- read.csv("c:\\MSc\\Statistics\\Data\\productivity.csv")
	attach(data)
	names(data)

	plot(productivity,mammals,pch=16,col="blue")

	cor.test(productivity,mammals,method="spearman")

	plot(productivity,mammals,pch=16,col=as.numeric(region))