Skip to content

Instantly share code, notes, and snippets.

@tilltnet
Last active June 29, 2020 16:46
Show Gist options
  • Save tilltnet/620992d8c6c6d3f8863ee8030144e2dc to your computer and use it in GitHub Desktop.
Save tilltnet/620992d8c6c6d3f8863ee8030144e2dc to your computer and use it in GitHub Desktop.
This is an implementation and reproduction of the edge gravity alogrithm and some results presented in Helander, M. E., & McAllister, S. (2018). The gravity of an edge. Applied Network Science, 3(1). https://doi.org/10.1007/s41109-018-0063-6.
find_possible_paths <- function(g, k = NULL, ksp = c("baerde", "yenpathy")) {
undirected <- FALSE
if (!igraph::is.directed(g)) {
undirected <- TRUE
weight <- igraph::E(g)$weight
e_l <- as.data.frame(igraph::as_edgelist(g))
l_e <- as.data.frame(cbind(e_l[, 2], e_l[, 1]))
e_l_l_e <- rbind(e_l, l_e)
e_l_l_e$weight <- rep(weight, 2)
g <- igraph::graph_from_data_frame(e_l_l_e)
}
if (is.null(igraph::V(g)$name)) {
name_comb1 <- combn(igraph::V(g), 2)
} else {
name_comb1 <- combn(igraph::V(g)$name, 2)
}
name_comb2 <- rbind(name_comb1[2, ],
name_comb1[1, ])
name_combs <- t(cbind(name_comb1, name_comb2))
pair_names <- paste(name_combs[, 1],
name_combs[, 2])
all_possible_paths <-
if (!is.null(k) & ksp[1] == "yenpathy") {
furrr::future_map2_dfr(name_combs[, 1],
name_combs[, 2],
~ path_list_to_df(yenpathy::k_shortest_paths(g, .x, .y, k = k), .x, .y))
} else if (!is.null(k) & ksp[1] == "baerde") {
if(is.numeric(name_combs)) {
g <- give_graph_names(g)
name_combs <-
cbind(paste0("v", name_combs[, 1]),
paste0("v", name_combs[, 2]))
}
furrr::future_map2_dfr(name_combs[, 1],
name_combs[, 2],
~ path_list_to_df(k_shortest_yen(g, .x, .y, k = k), .x, .y))
} else {
furrr::future_map2_dfr(name_combs[, 1],
name_combs[, 2],
~ path_list_to_df(purrr::map(igraph::all_simple_paths(g, .x, .y), as.character), .x, .y))
}
#names(all_possible_paths) <- pair_names
if (!is.null(k)) {
if (k > max(purrr::map_dbl(all_possible_paths, length)))
message(
"Maximum number of paths found per node pair is smaller than k, all possible paths found."
)
else
message(
"Maximum number of paths found per node pair equals k, there might be more possible paths. Try a higher number for k."
)
}
# res <- furrr::future_map_dfr(all_possible_paths,
# ~ purrr::map_dfr(.,
# function(x)
# data.frame(from = x[-length(x)],
# to = x[-1]), .id = "path"),
# .id = "node pair")
if (undirected) {
all_possible_paths <- dplyr::mutate(all_possible_paths,
from_ = ifelse(from < to, from, to),
to_ = ifelse(from > to, from, to))
dplyr::select(all_possible_paths, `node pair` = node.pair, path, from = from_, to = to_)
} else {
all_possible_paths
}
}
count_edge_gravity <- function(g, k = NULL, ksp = c("baerde", "yenpathy")) {
dplyr::count(find_possible_paths(g, k, ksp),
from, to, sort = TRUE, name = "edge gravity")
}
find_all_paths_node_pair <-
function(g, n1, n2, k) {
a <- yenpathy::k_shortest_paths(g, n1, n2, k = k)
path_list_to_df(a, n1, n2)
}
path_list_to_df <- function(l, n1, n2) {
map_dfr(l, function(p)
data.frame(
`node pair` = paste(n1, n2),
from = p[-length(p)],
to = p[-1]
), .id = "path")
}
feed_chunks <- function(g, chunk, k) {
map2_dfr(chunk$V1,
chunk$V2,
function(x, y) find_all_paths_node_pair(g, x, y, k))
}
for_feed_chunks <- function(chunk, k) {
res_tbl <- tibble()
for(i in 1:nrow(chunk)) res_tbl <- bind_rows(find_all_paths_node_pair(g, chunk$V1[i], chunk$V2[i], k), res_tbl)
res_tbl
}
# libraries needed:
# dplyr,
# purrr,
# furrr,
# future (will be installed with furrr as a dependency)
# igraph,
# yenpathy (optional for limiting k)
# ggplot2
# In order to avoid running out of memory due to the memory leaks that are
# currently present in the c++ library used in yenpathy, use the
# yenpathy fork in the install command below.
# Using yenpathy now needs to be specified with the 'ksp' argument, see
# example 4. yenpathy is much faster than the other solutions.
# un-comment next two lines to install packages
# install.packages("tidyverse", "furrr", "igraph")
# remotes::install_github("tilltnet/yenpathy", build_vignettes = FALSE)
# build_vignettes = TRUE fails when igraphdata not installed
library(dplyr)
library(ggplot2)
library(igraph)
library(tidyr)
source("edge_gravity.R")
# Example 1 --------------------------------------------------------------------
# Graph from yenpathy documentation.
my_graph <- data.frame(
source = c(1, 4, 5, 1, 1, 8, 1, 2, 7, 3),
sink = c(4, 5, 6, 6, 8, 6, 2, 7, 3, 6),
weight = c(1, 1, 1.5, 5, 1.5, 2.5, 1.5, 0.5, 0.5, 0.5)
)
g1 <- graph_from_data_frame(my_graph)
plot(g1)
find_possible_paths(g1, 2)
find_possible_paths(g1, 4)
count_edge_gravity(g1)
count_edge_gravity(g1, 4)
# Example 2 --------------------------------------------------------------------
# From Figure 1 in Helander, M. E., & McAllister, S. (2018). The gravity
# of an edge. Applied Network Science, 3(1).
# https://doi.org/10.1007/s41109-018-0063-6
# This is an undirected graph, that means that before the k-shortest paths
# can be found the network needs to be transformed so that each edge is duplicated
# as a directed connecting the adjacent nodes in both directions.
# The find_possible_paths() function takes care of that.
el2 <- tribble(
~from, ~to,
"1", "2",
"1", "3",
"2", "3",
"2", "4",
"3", "4",
"3", "5")
g2 <- graph_from_data_frame(el2, directed = FALSE)
plot(g2)
# This reproduces data in Table 2 and Fig. 2 in Helander & McAllister 2018.
# 58 paths
find_possible_paths(g2, 5) %>%
count(`node pair`, path) %>%
nrow()
# Instead of rank this uses the actual values of EB and EG.
g2_edge_gravity <- count_edge_gravity(g2, 5)
g2_edge_gravity <- count_edge_gravity(g2)
el2$`edge betweeness` <- edge_betweenness(g2)
full_join(el2, g2_edge_gravity) %>%
pivot_longer(c(`edge gravity`, `edge betweeness`)) %>%
ggplot(aes(x = reorder(paste(from, to), value), y = value, fill = name)) +
geom_bar(stat = "identity", position = "dodge") +
coord_flip() +
xlab(label = "edge")
# Example 3 --------------------------------------------------------------------
# This reproduces data in Fig. 14 Helander & McAllister 2018
# Numbering of vertices is not the same though!
library(netrankr)
data("florentine_m")
plot(florentine_m)
find_possible_paths(florentine_m, ksp = "yenpathy") %>%
count(`node pair`, path, sort = TRUE) %>%
nrow()
count_edge_gravity(g = florentine_m)
count_edge_gravity(florentine_m, 34)
count_edge_gravity(florentine_m, 34, ksp = "yenpathy")
k_shortest_paths(graph_df = florentine_m, from = "Acciaiuol", to = "Ridolfi", k = 50)
igraph::all_shortest_paths(florentine_m, "Acciaiuol")
# Example 4 ---------------------------------------------------------------
# "Bigger" network data, using parallel computing for finding paths!
library(igraphdata)
#data(package="igraphdata")
data(UKfaculty)
# Vertice/Edge Count
length(V(UKfaculty))
length(E(UKfaculty))
library(future)
# This is a memory saving, but rather slow solution.
plan(multisession, gc = TRUE)
start_time <- Sys.time()
res4a <- count_edge_gravity(g = UKfaculty, k = 100)
delta_t <- Sys.time() - start_time
delta_t
plan(sequential)
# With k = 500 this takes (now!) 3 minutes on a Ryzen 9 3950X with 16 Cores and 32 Threads.
# Memory usage seems fine when using yenpatyh fork from:
plan(multisession, gc = TRUE)
start_time <- Sys.time()
res4b <- count_edge_gravity(g = UKfaculty, k = 500, ksp = "yenpathy")
delta_t <- Sys.time() - start_time
delta_t
plan(sequential)
# This code is taken from https://stackoverflow.com/a/51509886
# Changes:
# - fixed bug, that would break code, when the actual k was smaller than the specified k - 1
# - Not deleting rootPath nodes (except for spurNode) caused this to create paths with loops
# - when vertices are not named but rather numerically numbered paths are now returned correctly
library(igraph)
library(tidyverse)
#'@return the shortest path as a list of vertices or NULL if there is no path between src and dest
shortest_path <- function(graph, src, dest){
path <- suppressWarnings(get.shortest.paths(graph, src, dest))
if(is.numeric(src))
path <- as.numeric(path$vpath[[1]])
else
path <- names(path$vpath[[1]])
if (length(path)==1) NULL else path
}
#'@return the sum of the weights of all the edges in the given path
path_weight <- function(path, graph) sum(E(graph, path=path)$weight)
#'@description sorts a list of paths based on the weight of the path
sort_paths <- function(graph, paths) paths[sapply(paths, path_weight, graph) %>% order]
#'@description creates a list of edges that should be deleted
find_edges_to_delete <- function(A, i, rootPath){
edgesToDelete <- NULL
for (p in A){
rootPath_p <- p[1:i]
if (all(rootPath_p == rootPath)){
edge <- paste(p[i], ifelse(is.na(p[i+1]),p[i],p[i+1]), sep = '|')
edgesToDelete[length(edgesToDelete)+1] <- edge
}
}
unique(edgesToDelete)
}
give_graph_names <- function(graph) {
V(graph)$name <- paste0("v", 1:length(V(graph)))
graph
}
# returns the k shortest path from src to dest
# sometimes it will return less than k shortest paths. This occurs when the max possible number of paths are less than k
k_shortest_yen <- function(graph, src, dest, k){
if (src == dest) stop('src and dest can not be the same (currently)')
# accepted paths
A <- list(shortest_path(graph, src, dest))
if (k == 1) return (A)
# potential paths
B <- list()
for (k_i in 2:k){
prev_path <- A[[k_i-1]]
num_nodes_to_loop <- length(prev_path) - 1
for(i in 1:num_nodes_to_loop){
spurNode <- prev_path[i]
rootPath <- prev_path[1:i]
edgesToDelete <- find_edges_to_delete(A, i,rootPath)
t_g <- delete.edges(graph, edgesToDelete)
#!#
t_g <- delete.vertices(t_g, rootPath[rootPath != spurNode])
spurPath <- shortest_path(t_g, spurNode, dest)
#!#
if (length(spurPath) > 0){
total_path <- list(c(rootPath[-i], spurPath))
if (!total_path %in% B) B[length(B)+1] <- total_path
}
}
if (length(B) == 0) break
B <- sort_paths(graph, B)
A[k_i] <- B[1]
B <- B[-1]
}
A
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment