jalapic · March 2, 2015 20:28
diff --git a/permtest b/permtest
 ### Permutation Test Example... #based on:  http://spark.rstudio.com/ahmed/permutation/

 # Group A
 a = c(86, 88, 89, 89, 92, 93, 94, 94, 94, 95, 95, 96, 96, 97, 97, 98, 98, 99, 99, 101, 106, 107, 110, 113, 116, 118)

 # Group B
 b = c(89, 90, 92, 93, 93, 96, 99, 99, 99, 102, 103, 104, 105, 106, 106, 107, 108, 108, 110, 110, 112, 114, 116, 116)


 df <- 
  data.frame(
  values = c(a,b),
  group = c(rep("A", length(a)), rep("B", length(b)))
  )

 df

 ### Graph it...

 library(ggplot2)

 ggplot(df, aes(group, values, color=group)) +
  geom_boxplot(aes(color=group, fill=group), alpha=.1) +
  geom_point(aes(color=group), size=3) +
  theme_bw()

 #or, 
 ggplot(df, aes(x=values, fill=group)) + geom_density(alpha=.3) + theme_bw()


 ## what you'd probably do....

 t.test(a, b)  #ok then  - normality ?  - equal variances ?

 shapiro.test(a) 
 shapiro.test(b)  #ok, now what ?

 x <- list(a, b)
 x

 bartlett.test(x)

 t.test(a, b, var.equal=T) #ok, now what ?



 ### Randomization Tests !


 # Combine the two datasets into a single dataset
 # i.e., under the null hypothesis, there is no difference between the two groups
 combined = c(a,b)

 # Observed difference
 diff.observed = mean(b) - mean(a)


 ###  Permute one time.

 ### Shuffle
 shuffled <- sample (combined, length(combined))

 # Reallocate groups
 a.random <- shuffled[1 : length(a)]
 b.random <- shuffled[(length(a) + 1) : length(combined)]

 a.random
 b.random

 diff.random[i] = mean(b.random) - mean(a.random)




 ### Do lots of times ...

 nperms = 1000

 diff.random = NULL
 for (i in 1 : nperms) {
  
  # Sample from the combined dataset without replacement
  shuffled <- sample(combined)
  
  a.random <- shuffled[1 : length(a)]
  b.random <- shuffled[(length(a) + 1) : length(combined)]
  
  # Null (permuated) difference
  diff.random[i] = mean(b.random) - mean(a.random)
 }

 # P-value is the fraction of how many times the permuted difference is equal or more extreme than the observed difference

 pvalue = sum(abs(diff.random) >= abs(diff.observed)) / nperms
 pvalue


 ### graph results

 ggplot(df, aes(x=diff.random)) + 
  geom_histogram(binwidth=.5, colour="black", fill="white") +
  theme_bw() +
  geom_vline(xintercept=diff.observed, color="red", lty=2, lwd=1)
	### Permutation Test Example... #based on: http://spark.rstudio.com/ahmed/permutation/

	# Group A
	a = c(86, 88, 89, 89, 92, 93, 94, 94, 94, 95, 95, 96, 96, 97, 97, 98, 98, 99, 99, 101, 106, 107, 110, 113, 116, 118)

	# Group B
	b = c(89, 90, 92, 93, 93, 96, 99, 99, 99, 102, 103, 104, 105, 106, 106, 107, 108, 108, 110, 110, 112, 114, 116, 116)


	df <-
	data.frame(
	values = c(a,b),
	group = c(rep("A", length(a)), rep("B", length(b)))
	)

	df

	### Graph it...

	library(ggplot2)

	ggplot(df, aes(group, values, color=group)) +
	geom_boxplot(aes(color=group, fill=group), alpha=.1) +
	geom_point(aes(color=group), size=3) +
	theme_bw()

	#or,
	ggplot(df, aes(x=values, fill=group)) + geom_density(alpha=.3) + theme_bw()


	## what you'd probably do....

	t.test(a, b) #ok then - normality ? - equal variances ?

	shapiro.test(a)
	shapiro.test(b) #ok, now what ?

	x <- list(a, b)
	x

	bartlett.test(x)

	t.test(a, b, var.equal=T) #ok, now what ?



	### Randomization Tests !


	# Combine the two datasets into a single dataset
	# i.e., under the null hypothesis, there is no difference between the two groups
	combined = c(a,b)

	# Observed difference
	diff.observed = mean(b) - mean(a)


	### Permute one time.

	### Shuffle
	shuffled <- sample (combined, length(combined))

	# Reallocate groups
	a.random <- shuffled[1 : length(a)]
	b.random <- shuffled[(length(a) + 1) : length(combined)]

	a.random
	b.random

	diff.random[i] = mean(b.random) - mean(a.random)




	### Do lots of times ...

	nperms = 1000

	diff.random = NULL
	for (i in 1 : nperms) {

	# Sample from the combined dataset without replacement
	shuffled <- sample(combined)

	a.random <- shuffled[1 : length(a)]
	b.random <- shuffled[(length(a) + 1) : length(combined)]

	# Null (permuated) difference
	diff.random[i] = mean(b.random) - mean(a.random)
	}

	# P-value is the fraction of how many times the permuted difference is equal or more extreme than the observed difference

	pvalue = sum(abs(diff.random) >= abs(diff.observed)) / nperms
	pvalue


	### graph results

	ggplot(df, aes(x=diff.random)) +
	geom_histogram(binwidth=.5, colour="black", fill="white") +
	theme_bw() +
	geom_vline(xintercept=diff.observed, color="red", lty=2, lwd=1)