Created
October 4, 2016 09:01
-
-
Save timcdlucas/cdb52b68c8c01968e438f9dc1871771d to your computer and use it in GitHub Desktop.
Extract homogenous subsets from tukeyHSD object.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(igraph) | |
set.seed(1) | |
# Make some data | |
x <- data.frame(y = c(rnorm(200), rnorm(200, 3), rnorm(200, 6)), | |
x = rep(letters[1:20], each = 30)) | |
# Quick plot | |
ggplot(x, aes(x = y, colour = x)) + geom_density() | |
# Run anova then tukey | |
x.aov <- aov(y ~ x, x) | |
tukey <- TukeyHSD(x.aov) | |
# set significance level | |
alpha <- 0.05 | |
# Find all homogenous pairs and get their names | |
nonsigpairs <- tukey$x[tukey$x[, 'p adj'] > alpha, ] | |
pairsnames <- strsplit(row.names(nonsigpairs), '-') | |
# Convert list into matrix of edges | |
pairsmatrix <- do.call(rbind, pairsnames) | |
# Convert to igraph object | |
graph <- graph_from_data_frame(pairsmatrix, directed = FALSE) | |
# We're looking for groups where all nodes are connected to all other nodes | |
plot(graph) | |
# List of igraph graphs | |
homosubsets <- max_cliques(graph) | |
# List of names | |
homosubsetnames <- lapply(homosubsets, names) | |
# So we have 4 groups. Two groups are the obviously the clusters you see in the plot | |
# But the bcafd group is split into two homogenous subsets. | |
# We can see that e and g are significantly different, which therefore gives two, largely overlapping homogenous subsets. | |
pairsmatrix[apply(pairsmatrix, 1, function(x) 'e' %in% x), ] | |
# I'll bundle into a function. Feel free to use without attributation etc. | |
# x should be a tukeyHSD object | |
# alpha is the significance level | |
# plot is a logical whether to plot the network of nonsignifican pairs. | |
homogenous.subsets <- function(x, alpha = 0.05, plot = FALSE){ | |
if(!require(igraph)) stop('Please install the igraph package') | |
if(!inherits(x, 'TukeyHSD')) stop('x should be a TukeyHSD object') | |
if(alpha < 0 | alpha > 1) stop('alpha should be in interval [0, 1]') | |
if(length(alpha) > 1 | !is.numeric(alpha)) stop('alpha should be a length 1 numeric') | |
# Find all homogenous pairs and get their names | |
nonsigpairs <- x$x[x$x[, 'p adj'] > alpha, ] | |
pairsnames <- strsplit(row.names(nonsigpairs), '-') | |
# Convert list into matrix of edges | |
pairsmatrix <- do.call(rbind, pairsnames) | |
# Convert to igraph object | |
graph <- graph_from_data_frame(pairsmatrix, directed = FALSE) | |
if(plot){ | |
# We're looking for groups where all nodes are connected to all other nodes | |
plot(graph) | |
} | |
# List of igraph graphs | |
homosubsets <- max_cliques(graph) | |
# List of names | |
homosubsetnames <- lapply(homosubsets, names) | |
return(homosubsetnames) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment