Created
March 2, 2015 20:28
-
-
Save jalapic/ce2ce00a4e014c2259f3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Permutation Test Example... #based on: http://spark.rstudio.com/ahmed/permutation/ | |
# Group A | |
a = c(86, 88, 89, 89, 92, 93, 94, 94, 94, 95, 95, 96, 96, 97, 97, 98, 98, 99, 99, 101, 106, 107, 110, 113, 116, 118) | |
# Group B | |
b = c(89, 90, 92, 93, 93, 96, 99, 99, 99, 102, 103, 104, 105, 106, 106, 107, 108, 108, 110, 110, 112, 114, 116, 116) | |
df <- | |
data.frame( | |
values = c(a,b), | |
group = c(rep("A", length(a)), rep("B", length(b))) | |
) | |
df | |
### Graph it... | |
library(ggplot2) | |
ggplot(df, aes(group, values, color=group)) + | |
geom_boxplot(aes(color=group, fill=group), alpha=.1) + | |
geom_point(aes(color=group), size=3) + | |
theme_bw() | |
#or, | |
ggplot(df, aes(x=values, fill=group)) + geom_density(alpha=.3) + theme_bw() | |
## what you'd probably do.... | |
t.test(a, b) #ok then - normality ? - equal variances ? | |
shapiro.test(a) | |
shapiro.test(b) #ok, now what ? | |
x <- list(a, b) | |
x | |
bartlett.test(x) | |
t.test(a, b, var.equal=T) #ok, now what ? | |
### Randomization Tests ! | |
# Combine the two datasets into a single dataset | |
# i.e., under the null hypothesis, there is no difference between the two groups | |
combined = c(a,b) | |
# Observed difference | |
diff.observed = mean(b) - mean(a) | |
### Permute one time. | |
### Shuffle | |
shuffled <- sample (combined, length(combined)) | |
# Reallocate groups | |
a.random <- shuffled[1 : length(a)] | |
b.random <- shuffled[(length(a) + 1) : length(combined)] | |
a.random | |
b.random | |
diff.random[i] = mean(b.random) - mean(a.random) | |
### Do lots of times ... | |
nperms = 1000 | |
diff.random = NULL | |
for (i in 1 : nperms) { | |
# Sample from the combined dataset without replacement | |
shuffled <- sample(combined) | |
a.random <- shuffled[1 : length(a)] | |
b.random <- shuffled[(length(a) + 1) : length(combined)] | |
# Null (permuated) difference | |
diff.random[i] = mean(b.random) - mean(a.random) | |
} | |
# P-value is the fraction of how many times the permuted difference is equal or more extreme than the observed difference | |
pvalue = sum(abs(diff.random) >= abs(diff.observed)) / nperms | |
pvalue | |
### graph results | |
ggplot(df, aes(x=diff.random)) + | |
geom_histogram(binwidth=.5, colour="black", fill="white") + | |
theme_bw() + | |
geom_vline(xintercept=diff.observed, color="red", lty=2, lwd=1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment