Last active
June 29, 2020 16:46
-
-
Save tilltnet/620992d8c6c6d3f8863ee8030144e2dc to your computer and use it in GitHub Desktop.
This is an implementation and reproduction of the edge gravity alogrithm and some results presented in Helander, M. E., & McAllister, S. (2018). The gravity of an edge. Applied Network Science, 3(1). https://doi.org/10.1007/s41109-018-0063-6.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| find_possible_paths <- function(g, k = NULL, ksp = c("baerde", "yenpathy")) { | |
| undirected <- FALSE | |
| if (!igraph::is.directed(g)) { | |
| undirected <- TRUE | |
| weight <- igraph::E(g)$weight | |
| e_l <- as.data.frame(igraph::as_edgelist(g)) | |
| l_e <- as.data.frame(cbind(e_l[, 2], e_l[, 1])) | |
| e_l_l_e <- rbind(e_l, l_e) | |
| e_l_l_e$weight <- rep(weight, 2) | |
| g <- igraph::graph_from_data_frame(e_l_l_e) | |
| } | |
| if (is.null(igraph::V(g)$name)) { | |
| name_comb1 <- combn(igraph::V(g), 2) | |
| } else { | |
| name_comb1 <- combn(igraph::V(g)$name, 2) | |
| } | |
| name_comb2 <- rbind(name_comb1[2, ], | |
| name_comb1[1, ]) | |
| name_combs <- t(cbind(name_comb1, name_comb2)) | |
| pair_names <- paste(name_combs[, 1], | |
| name_combs[, 2]) | |
| all_possible_paths <- | |
| if (!is.null(k) & ksp[1] == "yenpathy") { | |
| furrr::future_map2_dfr(name_combs[, 1], | |
| name_combs[, 2], | |
| ~ path_list_to_df(yenpathy::k_shortest_paths(g, .x, .y, k = k), .x, .y)) | |
| } else if (!is.null(k) & ksp[1] == "baerde") { | |
| if(is.numeric(name_combs)) { | |
| g <- give_graph_names(g) | |
| name_combs <- | |
| cbind(paste0("v", name_combs[, 1]), | |
| paste0("v", name_combs[, 2])) | |
| } | |
| furrr::future_map2_dfr(name_combs[, 1], | |
| name_combs[, 2], | |
| ~ path_list_to_df(k_shortest_yen(g, .x, .y, k = k), .x, .y)) | |
| } else { | |
| furrr::future_map2_dfr(name_combs[, 1], | |
| name_combs[, 2], | |
| ~ path_list_to_df(purrr::map(igraph::all_simple_paths(g, .x, .y), as.character), .x, .y)) | |
| } | |
| #names(all_possible_paths) <- pair_names | |
| if (!is.null(k)) { | |
| if (k > max(purrr::map_dbl(all_possible_paths, length))) | |
| message( | |
| "Maximum number of paths found per node pair is smaller than k, all possible paths found." | |
| ) | |
| else | |
| message( | |
| "Maximum number of paths found per node pair equals k, there might be more possible paths. Try a higher number for k." | |
| ) | |
| } | |
| # res <- furrr::future_map_dfr(all_possible_paths, | |
| # ~ purrr::map_dfr(., | |
| # function(x) | |
| # data.frame(from = x[-length(x)], | |
| # to = x[-1]), .id = "path"), | |
| # .id = "node pair") | |
| if (undirected) { | |
| all_possible_paths <- dplyr::mutate(all_possible_paths, | |
| from_ = ifelse(from < to, from, to), | |
| to_ = ifelse(from > to, from, to)) | |
| dplyr::select(all_possible_paths, `node pair` = node.pair, path, from = from_, to = to_) | |
| } else { | |
| all_possible_paths | |
| } | |
| } | |
| count_edge_gravity <- function(g, k = NULL, ksp = c("baerde", "yenpathy")) { | |
| dplyr::count(find_possible_paths(g, k, ksp), | |
| from, to, sort = TRUE, name = "edge gravity") | |
| } | |
| find_all_paths_node_pair <- | |
| function(g, n1, n2, k) { | |
| a <- yenpathy::k_shortest_paths(g, n1, n2, k = k) | |
| path_list_to_df(a, n1, n2) | |
| } | |
| path_list_to_df <- function(l, n1, n2) { | |
| map_dfr(l, function(p) | |
| data.frame( | |
| `node pair` = paste(n1, n2), | |
| from = p[-length(p)], | |
| to = p[-1] | |
| ), .id = "path") | |
| } | |
| feed_chunks <- function(g, chunk, k) { | |
| map2_dfr(chunk$V1, | |
| chunk$V2, | |
| function(x, y) find_all_paths_node_pair(g, x, y, k)) | |
| } | |
| for_feed_chunks <- function(chunk, k) { | |
| res_tbl <- tibble() | |
| for(i in 1:nrow(chunk)) res_tbl <- bind_rows(find_all_paths_node_pair(g, chunk$V1[i], chunk$V2[i], k), res_tbl) | |
| res_tbl | |
| } | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # libraries needed: | |
| # dplyr, | |
| # purrr, | |
| # furrr, | |
| # future (will be installed with furrr as a dependency) | |
| # igraph, | |
| # yenpathy (optional for limiting k) | |
| # ggplot2 | |
| # In order to avoid running out of memory due to the memory leaks that are | |
| # currently present in the c++ library used in yenpathy, use the | |
| # yenpathy fork in the install command below. | |
| # Using yenpathy now needs to be specified with the 'ksp' argument, see | |
| # example 4. yenpathy is much faster than the other solutions. | |
| # un-comment next two lines to install packages | |
| # install.packages("tidyverse", "furrr", "igraph") | |
| # remotes::install_github("tilltnet/yenpathy", build_vignettes = FALSE) | |
| # build_vignettes = TRUE fails when igraphdata not installed | |
| library(dplyr) | |
| library(ggplot2) | |
| library(igraph) | |
| library(tidyr) | |
| source("edge_gravity.R") | |
| # Example 1 -------------------------------------------------------------------- | |
| # Graph from yenpathy documentation. | |
| my_graph <- data.frame( | |
| source = c(1, 4, 5, 1, 1, 8, 1, 2, 7, 3), | |
| sink = c(4, 5, 6, 6, 8, 6, 2, 7, 3, 6), | |
| weight = c(1, 1, 1.5, 5, 1.5, 2.5, 1.5, 0.5, 0.5, 0.5) | |
| ) | |
| g1 <- graph_from_data_frame(my_graph) | |
| plot(g1) | |
| find_possible_paths(g1, 2) | |
| find_possible_paths(g1, 4) | |
| count_edge_gravity(g1) | |
| count_edge_gravity(g1, 4) | |
| # Example 2 -------------------------------------------------------------------- | |
| # From Figure 1 in Helander, M. E., & McAllister, S. (2018). The gravity | |
| # of an edge. Applied Network Science, 3(1). | |
| # https://doi.org/10.1007/s41109-018-0063-6 | |
| # This is an undirected graph, that means that before the k-shortest paths | |
| # can be found the network needs to be transformed so that each edge is duplicated | |
| # as a directed connecting the adjacent nodes in both directions. | |
| # The find_possible_paths() function takes care of that. | |
| el2 <- tribble( | |
| ~from, ~to, | |
| "1", "2", | |
| "1", "3", | |
| "2", "3", | |
| "2", "4", | |
| "3", "4", | |
| "3", "5") | |
| g2 <- graph_from_data_frame(el2, directed = FALSE) | |
| plot(g2) | |
| # This reproduces data in Table 2 and Fig. 2 in Helander & McAllister 2018. | |
| # 58 paths | |
| find_possible_paths(g2, 5) %>% | |
| count(`node pair`, path) %>% | |
| nrow() | |
| # Instead of rank this uses the actual values of EB and EG. | |
| g2_edge_gravity <- count_edge_gravity(g2, 5) | |
| g2_edge_gravity <- count_edge_gravity(g2) | |
| el2$`edge betweeness` <- edge_betweenness(g2) | |
| full_join(el2, g2_edge_gravity) %>% | |
| pivot_longer(c(`edge gravity`, `edge betweeness`)) %>% | |
| ggplot(aes(x = reorder(paste(from, to), value), y = value, fill = name)) + | |
| geom_bar(stat = "identity", position = "dodge") + | |
| coord_flip() + | |
| xlab(label = "edge") | |
| # Example 3 -------------------------------------------------------------------- | |
| # This reproduces data in Fig. 14 Helander & McAllister 2018 | |
| # Numbering of vertices is not the same though! | |
| library(netrankr) | |
| data("florentine_m") | |
| plot(florentine_m) | |
| find_possible_paths(florentine_m, ksp = "yenpathy") %>% | |
| count(`node pair`, path, sort = TRUE) %>% | |
| nrow() | |
| count_edge_gravity(g = florentine_m) | |
| count_edge_gravity(florentine_m, 34) | |
| count_edge_gravity(florentine_m, 34, ksp = "yenpathy") | |
| k_shortest_paths(graph_df = florentine_m, from = "Acciaiuol", to = "Ridolfi", k = 50) | |
| igraph::all_shortest_paths(florentine_m, "Acciaiuol") | |
| # Example 4 --------------------------------------------------------------- | |
| # "Bigger" network data, using parallel computing for finding paths! | |
| library(igraphdata) | |
| #data(package="igraphdata") | |
| data(UKfaculty) | |
| # Vertice/Edge Count | |
| length(V(UKfaculty)) | |
| length(E(UKfaculty)) | |
| library(future) | |
| # This is a memory saving, but rather slow solution. | |
| plan(multisession, gc = TRUE) | |
| start_time <- Sys.time() | |
| res4a <- count_edge_gravity(g = UKfaculty, k = 100) | |
| delta_t <- Sys.time() - start_time | |
| delta_t | |
| plan(sequential) | |
| # With k = 500 this takes (now!) 3 minutes on a Ryzen 9 3950X with 16 Cores and 32 Threads. | |
| # Memory usage seems fine when using yenpatyh fork from: | |
| plan(multisession, gc = TRUE) | |
| start_time <- Sys.time() | |
| res4b <- count_edge_gravity(g = UKfaculty, k = 500, ksp = "yenpathy") | |
| delta_t <- Sys.time() - start_time | |
| delta_t | |
| plan(sequential) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This code is taken from https://stackoverflow.com/a/51509886 | |
| # Changes: | |
| # - fixed bug, that would break code, when the actual k was smaller than the specified k - 1 | |
| # - Not deleting rootPath nodes (except for spurNode) caused this to create paths with loops | |
| # - when vertices are not named but rather numerically numbered paths are now returned correctly | |
| library(igraph) | |
| library(tidyverse) | |
| #'@return the shortest path as a list of vertices or NULL if there is no path between src and dest | |
| shortest_path <- function(graph, src, dest){ | |
| path <- suppressWarnings(get.shortest.paths(graph, src, dest)) | |
| if(is.numeric(src)) | |
| path <- as.numeric(path$vpath[[1]]) | |
| else | |
| path <- names(path$vpath[[1]]) | |
| if (length(path)==1) NULL else path | |
| } | |
| #'@return the sum of the weights of all the edges in the given path | |
| path_weight <- function(path, graph) sum(E(graph, path=path)$weight) | |
| #'@description sorts a list of paths based on the weight of the path | |
| sort_paths <- function(graph, paths) paths[sapply(paths, path_weight, graph) %>% order] | |
| #'@description creates a list of edges that should be deleted | |
| find_edges_to_delete <- function(A, i, rootPath){ | |
| edgesToDelete <- NULL | |
| for (p in A){ | |
| rootPath_p <- p[1:i] | |
| if (all(rootPath_p == rootPath)){ | |
| edge <- paste(p[i], ifelse(is.na(p[i+1]),p[i],p[i+1]), sep = '|') | |
| edgesToDelete[length(edgesToDelete)+1] <- edge | |
| } | |
| } | |
| unique(edgesToDelete) | |
| } | |
| give_graph_names <- function(graph) { | |
| V(graph)$name <- paste0("v", 1:length(V(graph))) | |
| graph | |
| } | |
| # returns the k shortest path from src to dest | |
| # sometimes it will return less than k shortest paths. This occurs when the max possible number of paths are less than k | |
| k_shortest_yen <- function(graph, src, dest, k){ | |
| if (src == dest) stop('src and dest can not be the same (currently)') | |
| # accepted paths | |
| A <- list(shortest_path(graph, src, dest)) | |
| if (k == 1) return (A) | |
| # potential paths | |
| B <- list() | |
| for (k_i in 2:k){ | |
| prev_path <- A[[k_i-1]] | |
| num_nodes_to_loop <- length(prev_path) - 1 | |
| for(i in 1:num_nodes_to_loop){ | |
| spurNode <- prev_path[i] | |
| rootPath <- prev_path[1:i] | |
| edgesToDelete <- find_edges_to_delete(A, i,rootPath) | |
| t_g <- delete.edges(graph, edgesToDelete) | |
| #!# | |
| t_g <- delete.vertices(t_g, rootPath[rootPath != spurNode]) | |
| spurPath <- shortest_path(t_g, spurNode, dest) | |
| #!# | |
| if (length(spurPath) > 0){ | |
| total_path <- list(c(rootPath[-i], spurPath)) | |
| if (!total_path %in% B) B[length(B)+1] <- total_path | |
| } | |
| } | |
| if (length(B) == 0) break | |
| B <- sort_paths(graph, B) | |
| A[k_i] <- B[1] | |
| B <- B[-1] | |
| } | |
| A | |
| } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment