Skip to content

Instantly share code, notes, and snippets.

@ColCarroll
Last active August 3, 2017 22:34
Show Gist options
  • Save ColCarroll/5954870 to your computer and use it in GitHub Desktop.
Save ColCarroll/5954870 to your computer and use it in GitHub Desktop.
Graph clustering demo in R
sparse_matrix <- function(size, sparsity){
# Returns a <size> by <size> symmetric matrix
# with the desired sparsity percent
sparse <- matrix(
rbinom(size * size, 1, sparsity),
ncol = size,
nrow = size
)
sparse[lower.tri(sparse)] <- 0
sparse <- sparse + t(sparse)
return(sparse)
}
plot_graph <- function(network.g, filename){
la <- layout.auto(network.g)
png(filename, width = 1000, height = 1000)
plot(network.g, layout = la, vertex.size = 2, vertex.label = NA)
dev.off()
}
plot_graph_groups <- function(network.g, filename, groups){
la <- layout.auto(network.g)
png(filename, width = 1000, height = 1000)
plot(network.g, layout = la, vertex.size = 2, vertex.label = NA, mark.groups = groups)
dev.off()
}
plot_mat <- function(mat, filename){
# Convenience function to plot matrices as pixel images
png(filename, width = 500, height = 500)
image(
apply(mat, 2, rev), # flips matrix vertically, like we're used to seeing
col = c("white","black"),
xaxt = 'n',
yaxt = 'n'
)
dev.off()
}
sample_cluster <- function(n_clusters=10, n_nodes = 150, background = 0.05, cluster = 0.9){
# Builds a sample clustered matrix to use as a test data set
# <n_clusters> is the number of clusters in the matrix
# <n_nodes> is the number of nodes
# <background> is the background occurance of connections as a percent
# <cluster> is the intercluster connection percent
#Creates a list of list n_clusters+1 starting at 0 and ending at n_nodes,
#then diffs that to get a list of cluster sizes that will sum to n_nodes
require("igraph")
cluster_sizes <- diff(
c(
0,
sort(sample(1:n_nodes-1, n_clusters-1)),
n_nodes
)
)
cluster_sizes <- cluster_sizes[cluster_sizes > 0]
# A matrix with background noise
network <- sparse_matrix(n_nodes, background)
# Add dense graphs
start <- 1
end <- 0
for (i in 1:length(cluster_sizes)){
size <- cluster_sizes[i]
end <- end + size
network[start:end,start:end] <- sparse_matrix(size, cluster)
start <- start + size
}
diag(network) <- NA
plot_mat(network, "clustered_network.png")
shuffle <- sample(1:n_nodes, n_nodes)
network <- network[shuffle, shuffle] #shuffles the matrix
plot_mat(network, "shuffled_network.png")
network.g <- graph.adjacency(network, weighted = NULL,
mode = "undirected", diag = FALSE)
# plot the clustered graph
plot_graph(network.g, "ungrouped_graph.png")
return(list(network, network.g))
}
unshuffle <- function(network.m, network.g){
# Uses edge_betweenness to cluster the matrix. Works well
# at smaller sizes. Not as well for larger
edge_betweenness <- edge.betweenness.community(network.g)
print(edge_betweenness)
new_order <- order(edge_betweenness$membership)
new_network <- network.m[new_order, new_order]
plot_mat(new_network, "edge_betweenness_network.png")
groups <- list()
list_length =length(unique(edge_betweenness$membership))
for (i in 1:list_length){
groups[[i]] = which(edge_betweenness$membership == i)
}
plot_graph_groups(network.g, "grouped_graph.png", groups)
}
demo <- function(){
network <- sample_cluster()
unshuffle(network[[1]], network[[2]])
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment