Last active
August 3, 2017 22:34
-
-
Save ColCarroll/5954870 to your computer and use it in GitHub Desktop.
Graph clustering demo in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sparse_matrix <- function(size, sparsity){ | |
# Returns a <size> by <size> symmetric matrix | |
# with the desired sparsity percent | |
sparse <- matrix( | |
rbinom(size * size, 1, sparsity), | |
ncol = size, | |
nrow = size | |
) | |
sparse[lower.tri(sparse)] <- 0 | |
sparse <- sparse + t(sparse) | |
return(sparse) | |
} | |
plot_graph <- function(network.g, filename){ | |
la <- layout.auto(network.g) | |
png(filename, width = 1000, height = 1000) | |
plot(network.g, layout = la, vertex.size = 2, vertex.label = NA) | |
dev.off() | |
} | |
plot_graph_groups <- function(network.g, filename, groups){ | |
la <- layout.auto(network.g) | |
png(filename, width = 1000, height = 1000) | |
plot(network.g, layout = la, vertex.size = 2, vertex.label = NA, mark.groups = groups) | |
dev.off() | |
} | |
plot_mat <- function(mat, filename){ | |
# Convenience function to plot matrices as pixel images | |
png(filename, width = 500, height = 500) | |
image( | |
apply(mat, 2, rev), # flips matrix vertically, like we're used to seeing | |
col = c("white","black"), | |
xaxt = 'n', | |
yaxt = 'n' | |
) | |
dev.off() | |
} | |
sample_cluster <- function(n_clusters=10, n_nodes = 150, background = 0.05, cluster = 0.9){ | |
# Builds a sample clustered matrix to use as a test data set | |
# <n_clusters> is the number of clusters in the matrix | |
# <n_nodes> is the number of nodes | |
# <background> is the background occurance of connections as a percent | |
# <cluster> is the intercluster connection percent | |
#Creates a list of list n_clusters+1 starting at 0 and ending at n_nodes, | |
#then diffs that to get a list of cluster sizes that will sum to n_nodes | |
require("igraph") | |
cluster_sizes <- diff( | |
c( | |
0, | |
sort(sample(1:n_nodes-1, n_clusters-1)), | |
n_nodes | |
) | |
) | |
cluster_sizes <- cluster_sizes[cluster_sizes > 0] | |
# A matrix with background noise | |
network <- sparse_matrix(n_nodes, background) | |
# Add dense graphs | |
start <- 1 | |
end <- 0 | |
for (i in 1:length(cluster_sizes)){ | |
size <- cluster_sizes[i] | |
end <- end + size | |
network[start:end,start:end] <- sparse_matrix(size, cluster) | |
start <- start + size | |
} | |
diag(network) <- NA | |
plot_mat(network, "clustered_network.png") | |
shuffle <- sample(1:n_nodes, n_nodes) | |
network <- network[shuffle, shuffle] #shuffles the matrix | |
plot_mat(network, "shuffled_network.png") | |
network.g <- graph.adjacency(network, weighted = NULL, | |
mode = "undirected", diag = FALSE) | |
# plot the clustered graph | |
plot_graph(network.g, "ungrouped_graph.png") | |
return(list(network, network.g)) | |
} | |
unshuffle <- function(network.m, network.g){ | |
# Uses edge_betweenness to cluster the matrix. Works well | |
# at smaller sizes. Not as well for larger | |
edge_betweenness <- edge.betweenness.community(network.g) | |
print(edge_betweenness) | |
new_order <- order(edge_betweenness$membership) | |
new_network <- network.m[new_order, new_order] | |
plot_mat(new_network, "edge_betweenness_network.png") | |
groups <- list() | |
list_length =length(unique(edge_betweenness$membership)) | |
for (i in 1:list_length){ | |
groups[[i]] = which(edge_betweenness$membership == i) | |
} | |
plot_graph_groups(network.g, "grouped_graph.png", groups) | |
} | |
demo <- function(){ | |
network <- sample_cluster() | |
unshuffle(network[[1]], network[[2]]) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment