Forked from clairemcwhite/simplified_dendrogram_to_circlepack.R
Created
January 2, 2019 03:19
-
-
Save crazyhottommy/d40c4412d73254eb98bf63352b79966b to your computer and use it in GitHub Desktop.
Demo of going from an igraph dendrogram to a simplified view as a ggraph circlepack as in https://twitter.com/clairemcwhite/status/1079895521446293505
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(ggraph) | |
library(dendextend) | |
library(igraph) | |
cut_df <- function(dendrogram, height, c){ | |
#Function to cut a dendrogram | |
cd <- cutree(dendrogram, h = height) %>% as.data.frame() | |
cd$ID <- row.names(cd) | |
cd <- cd %>% as_tibble() | |
colname <- paste("cut", as.character(round(c, 2)), sep = "_") | |
names(cd) <- c(colname, "ID") | |
return(cd) | |
} | |
cut_dend <- function(dendrogram, cuts){ | |
#Function to cut the dendrogram at particular heights | |
ht <- max(get_nodes_attr(dendrogram, "height")) | |
cut_clusters <- data.frame(ID = as.character()) | |
for (c in cuts){ | |
cut_clusters <- merge(cut_clusters, cut_df(dendrogram, c*ht, c), all=TRUE) | |
} | |
return(cut_clusters) | |
} | |
#The point of this code is to threshold a dendrogram at various points, and plot clusters as subsets of their parent cluster | |
d_iris <- as.dendrogram(hclust(dist(iris))) | |
plot(d_iris, vertex.label = NA) | |
#Choose points up the dendrogram at which to cut into clusters, 1.0 is stem and 0.0 is tips | |
#In this iris dendrogram, 0.6 cuts into two clusters, 0.4 into 4 clusters, and 0.2 into 12 clusters | |
cut_clusters <- cut_dend(d_iris, c(0.6,0.4,0.2)) | |
#Each cluster needs a unique ID, not a plain number | |
#Borrow unique identifier for each column from its column header | |
clusters_uniqued <- cut_clusters %>% | |
gather(clusterset, clusternum, -ID) %>% | |
mutate(clusterid = paste0(clusternum, clusterset)) %>% | |
select(-clusternum) %>% | |
spread( clusterset, clusterid) | |
#All nodes need to be connected back to single origin point | |
#This can be called anything, just not the same thing as the other cluster ids | |
clusters_uniqued$link <- "origin" | |
#create edges in "from", "to" format | |
#https://www.r-graph-gallery.com/334-basic-dendrogram-with-ggraph/ | |
edges_level0_1 = clusters_uniqued %>% select(link, cut_0.6) %>% unique %>% rename(from = link, to = cut_0.6) | |
edges_level1_2 = clusters_uniqued %>% select( cut_0.6, cut_0.4) %>% unique %>% rename(from = cut_0.6, to = cut_0.4) | |
edges_level2_3 = clusters_uniqued %>% select( cut_0.4, cut_0.2) %>% unique %>% rename(from = cut_0.4, to = cut_0.2) | |
#The final level is the individual tips of the dendrogram as the smallest dots in the circlepack | |
edges_level3_final = clusters_uniqued %>% select( cut_0.2, ID) %>% unique %>% rename(from = cut_0.2, to = ID) | |
edge_list=rbind(edges_level0_1, edges_level1_2, edges_level2_3, edges_level3_final ) | |
mygraph <- graph_from_data_frame(edge_list) | |
#If you want to hide the first large circle see: https://www.r-graph-gallery.com/315-hide-first-level-in-circle-packing/ | |
ggraph(mygraph, layout = 'circlepack') + | |
geom_node_circle(aes(fill = depth )) + | |
theme_void() + | |
theme(legend.position = "FALSE") + | |
NULL |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment