Skip to content

Instantly share code, notes, and snippets.

@timcdlucas
Created October 4, 2016 09:01
Show Gist options
  • Save timcdlucas/cdb52b68c8c01968e438f9dc1871771d to your computer and use it in GitHub Desktop.
Save timcdlucas/cdb52b68c8c01968e438f9dc1871771d to your computer and use it in GitHub Desktop.
Extract homogenous subsets from tukeyHSD object.
library(ggplot2)
library(igraph)
set.seed(1)
# Make some data
x <- data.frame(y = c(rnorm(200), rnorm(200, 3), rnorm(200, 6)),
x = rep(letters[1:20], each = 30))
# Quick plot
ggplot(x, aes(x = y, colour = x)) + geom_density()
# Run anova then tukey
x.aov <- aov(y ~ x, x)
tukey <- TukeyHSD(x.aov)
# set significance level
alpha <- 0.05
# Find all homogenous pairs and get their names
nonsigpairs <- tukey$x[tukey$x[, 'p adj'] > alpha, ]
pairsnames <- strsplit(row.names(nonsigpairs), '-')
# Convert list into matrix of edges
pairsmatrix <- do.call(rbind, pairsnames)
# Convert to igraph object
graph <- graph_from_data_frame(pairsmatrix, directed = FALSE)
# We're looking for groups where all nodes are connected to all other nodes
plot(graph)
# List of igraph graphs
homosubsets <- max_cliques(graph)
# List of names
homosubsetnames <- lapply(homosubsets, names)
# So we have 4 groups. Two groups are the obviously the clusters you see in the plot
# But the bcafd group is split into two homogenous subsets.
# We can see that e and g are significantly different, which therefore gives two, largely overlapping homogenous subsets.
pairsmatrix[apply(pairsmatrix, 1, function(x) 'e' %in% x), ]
# I'll bundle into a function. Feel free to use without attributation etc.
# x should be a tukeyHSD object
# alpha is the significance level
# plot is a logical whether to plot the network of nonsignifican pairs.
homogenous.subsets <- function(x, alpha = 0.05, plot = FALSE){
if(!require(igraph)) stop('Please install the igraph package')
if(!inherits(x, 'TukeyHSD')) stop('x should be a TukeyHSD object')
if(alpha < 0 | alpha > 1) stop('alpha should be in interval [0, 1]')
if(length(alpha) > 1 | !is.numeric(alpha)) stop('alpha should be a length 1 numeric')
# Find all homogenous pairs and get their names
nonsigpairs <- x$x[x$x[, 'p adj'] > alpha, ]
pairsnames <- strsplit(row.names(nonsigpairs), '-')
# Convert list into matrix of edges
pairsmatrix <- do.call(rbind, pairsnames)
# Convert to igraph object
graph <- graph_from_data_frame(pairsmatrix, directed = FALSE)
if(plot){
# We're looking for groups where all nodes are connected to all other nodes
plot(graph)
}
# List of igraph graphs
homosubsets <- max_cliques(graph)
# List of names
homosubsetnames <- lapply(homosubsets, names)
return(homosubsetnames)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment