Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save crazyhottommy/d40c4412d73254eb98bf63352b79966b to your computer and use it in GitHub Desktop.
Save crazyhottommy/d40c4412d73254eb98bf63352b79966b to your computer and use it in GitHub Desktop.
Demo of going from an igraph dendrogram to a simplified view as a ggraph circlepack as in https://twitter.com/clairemcwhite/status/1079895521446293505
library(tidyverse)
library(ggraph)
library(dendextend)
library(igraph)
cut_df <- function(dendrogram, height, c){
#Function to cut a dendrogram
cd <- cutree(dendrogram, h = height) %>% as.data.frame()
cd$ID <- row.names(cd)
cd <- cd %>% as_tibble()
colname <- paste("cut", as.character(round(c, 2)), sep = "_")
names(cd) <- c(colname, "ID")
return(cd)
}
cut_dend <- function(dendrogram, cuts){
#Function to cut the dendrogram at particular heights
ht <- max(get_nodes_attr(dendrogram, "height"))
cut_clusters <- data.frame(ID = as.character())
for (c in cuts){
cut_clusters <- merge(cut_clusters, cut_df(dendrogram, c*ht, c), all=TRUE)
}
return(cut_clusters)
}
#The point of this code is to threshold a dendrogram at various points, and plot clusters as subsets of their parent cluster
d_iris <- as.dendrogram(hclust(dist(iris)))
plot(d_iris, vertex.label = NA)
#Choose points up the dendrogram at which to cut into clusters, 1.0 is stem and 0.0 is tips
#In this iris dendrogram, 0.6 cuts into two clusters, 0.4 into 4 clusters, and 0.2 into 12 clusters
cut_clusters <- cut_dend(d_iris, c(0.6,0.4,0.2))
#Each cluster needs a unique ID, not a plain number
#Borrow unique identifier for each column from its column header
clusters_uniqued <- cut_clusters %>%
gather(clusterset, clusternum, -ID) %>%
mutate(clusterid = paste0(clusternum, clusterset)) %>%
select(-clusternum) %>%
spread( clusterset, clusterid)
#All nodes need to be connected back to single origin point
#This can be called anything, just not the same thing as the other cluster ids
clusters_uniqued$link <- "origin"
#create edges in "from", "to" format
#https://www.r-graph-gallery.com/334-basic-dendrogram-with-ggraph/
edges_level0_1 = clusters_uniqued %>% select(link, cut_0.6) %>% unique %>% rename(from = link, to = cut_0.6)
edges_level1_2 = clusters_uniqued %>% select( cut_0.6, cut_0.4) %>% unique %>% rename(from = cut_0.6, to = cut_0.4)
edges_level2_3 = clusters_uniqued %>% select( cut_0.4, cut_0.2) %>% unique %>% rename(from = cut_0.4, to = cut_0.2)
#The final level is the individual tips of the dendrogram as the smallest dots in the circlepack
edges_level3_final = clusters_uniqued %>% select( cut_0.2, ID) %>% unique %>% rename(from = cut_0.2, to = ID)
edge_list=rbind(edges_level0_1, edges_level1_2, edges_level2_3, edges_level3_final )
mygraph <- graph_from_data_frame(edge_list)
#If you want to hide the first large circle see: https://www.r-graph-gallery.com/315-hide-first-level-in-circle-packing/
ggraph(mygraph, layout = 'circlepack') +
geom_node_circle(aes(fill = depth )) +
theme_void() +
theme(legend.position = "FALSE") +
NULL
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment