Skip to content

Instantly share code, notes, and snippets.

@jacksonpradolima
Last active April 24, 2017 22:32
Show Gist options
  • Save jacksonpradolima/9f4b5def641ef61504c8b32c87c409c8 to your computer and use it in GitHub Desktop.
Save jacksonpradolima/9f4b5def641ef61504c8b32c87c409c8 to your computer and use it in GitHub Desktop.
Kruskal-Wallist test with post-hoc using Nemenyi test with Critical Difference. Chi-squared is applied automatically when ties are present, otherwise Tukey is used as default.
#' @title Tests for multiple comparisons - Kruskal-Wallis with Post-hoc (when necessary)
#'
#' @description This function is a wrapper to multiple comparison tests.
#' @param value
#' @param group
#' @param data
#' @param alpha
kruskal.test.with.post.hoc <- function(value, group, data, alpha = 0.05, notch = FALSE, omm = FALSE)
{
print("If you would like export the data frames to LaTeX, we recommend to install and to use 'xtable'.")
print("Check if you missing the packages 'graph' and 'Rgraphviz'. Try to install them using bioconductor")
#source("http://bioconductor.org/biocLite.R")
#biocLite(c("graph","Rgraphviz"))
# Loading needed packages
if(!require(DescTools))
{
print("You are missing the package 'DescTools', we will now try to install it...")
install.packages("DescTools")
library(DescTools)
}
if(!require(Hmisc))
{
print("You are missing the package 'Hmisc', we will now try to install it...")
install.packages("Hmisc")
library(Hmisc)
}
if(!require(scmamp))
{
print("You are missing the package 'scmamp', we will now try to install it...")
install.packages("scmamp")
library(scmamp)
}
if(!require(PMCMR))
{
print("You are missing the package 'PMCMR', we will now try to install it...")
install.packages("PMCMR")
library(PMCMR)
}
if(!require(pgirmess))
{
print("You are missing the package 'pgirmess', we will now try to install it...")
install.packages("pgirmess")
library(pgirmess)
}
if(!require(effsize))
{
print("You are missing the package 'effsize', we will now try to install it...")
install.packages("effsize")
library(effsize)
}
options(scipen=999)
pre.results <- kruskal.test(value ~ group, data=data)
chi_squared <- round(pre.results$statistic, 3)
p.value <- round(pre.results$p.value, 6)
degree.freed <- pre.results$parameter
### {Begin} plot's
#dev.off()
p <- ifelse(p.value < 0.001, "< 0.001", ifelse(p.value < 0.01, "< 0.01", ifelse(p.value < 0.05, "< 0.05",round(p.value, 3))))
boxplot(value ~ group, data = data, notch = notch)
stripchart(value ~ group, vertical = TRUE, data = data, method = "jitter", add = TRUE, pch = 16, col = 'black', cex = 0.5)
# Jittered BoxPlots (Gráfico de dispersão)
title(main="", sub=paste("Kruskal-Wallis chi-squared=", chi_squared, ", df=", degree.freed ,", p=", p), cex.sub=0.8)
if (omm==TRUE) {
abline(h=mean(value), lty=2, col="red")
abline(h=median(value), lty=3, col="blue")
}
### {End} plot's
# If the Kruskal-Wallis test is significant, a post-hoc analysis can be performed
# to determine which levels of the independent variable differ from each other level.
if(p.value < alpha){
#Nemenyi test for multiple comparisons
#Zar (2010) suggests that the Nemenyi test is not appropriate for groups with unequal numbers of observations.
post.results <- tryCatch({
posthoc.kruskal.nemenyi.test(value, group, "Tukey")
}, warning=function(w) {
## do something about the warning, maybe return 'NA'
#message("handling warning: ", conditionMessage(w))
posthoc.kruskal.nemenyi.test(value, group, "Chisquare")
})
## LaTeX formated: Significances highlighted in bold
## Pay attention! Read from left to right the comparations. See the TRUE values!!!
no.diff <- post.results$p.value < alpha
no.diff[is.na(no.diff)] <- FALSE
writeTabular(table=post.results$p.value, format='f', bold=no.diff,hrule=0,vrule=0)
### Multiple comparison test between treatments or treatments versus control after Kruskal-Wallis test ###
# This test helps determining which groups are different with pairwise comparisons adjusted
# appropriately for multiple comparisons. Those pairs of groups which have observed differences
# higher than a critical value are considered statistically different at a given significance level.
kruskal.mc <- kruskalmc(value ~ group, probs=alpha, data=data, cont=NULL)
groupnames <- paste0(unique(data[,1]))
sizeNames <- length(groupnames)
df <- data.frame(matrix(ncol = sizeNames, nrow= sizeNames), row.names = groupnames)
colnames(df) <- groupnames
X <- split(data, group)
## vector
for(name in names(X)){
for(name2 in names(X)){
if(!identical(name, name2)){
effsize <- VD.A(cbind(data[data$Group %in% c(name),]$Value), cbind(data[data$Group %in% c(name2),]$Value))
df[name, name2] <- paste0(round(effsize$estimate,4), " (", paste0(effsize$magnitude), ")")
}
}
}
list.to.return <- list(Kruskal = pre.results, Nemenyi = post.results, KruskalMC = kruskal.mc, DiffTable = no.diff, EffectSize = df)
return(list.to.return)
}
else{
print("The results where not significant. There is no need for a post-hoc test.")
list.to.return <- list(Kruskal = pre.results, Nemenyi = NULL, KruskalMC = NULL, DiffTable = NULL, EffectSize = NULL)
return(list.to.return)
}
#References
##Zar, J.H. 2010. Biostatistical Analysis, 5th ed. Pearson Prentice Hall: Upper Saddle River, NJ.
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment