Last active
December 30, 2017 06:56
-
-
Save dmarx/80b4d093bdcab2fff97ee0da2968084f to your computer and use it in GitHub Desktop.
Experimentation for response to: https://stackoverflow.com/questions/47387356/calculating-average-of-shortest-paths-for-each-vertex-for-a-3-million-node-graph
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| install.packages('doParallel') | |
| library(igraph) | |
| library(foreach) | |
| library(doParallel) | |
| stopCluster(cl) | |
| cl <- makeCluster(8) | |
| registerDoParallel(cl) | |
| sj_alg <- function(G){ | |
| N <- gorder(G) | |
| lapply(V(G), function(v){ | |
| q <- shortest.paths(G, v ) | |
| rowSums(q*is.finite(q),na.rm = TRUE)/N | |
| })} | |
| sj_par <- function(G){ | |
| N <- gorder(G) | |
| foreach(v=1:N) %dopar% { | |
| q <- igraph::shortest.paths(G, v ) | |
| rowSums(q*is.finite(q),na.rm = TRUE)/N | |
| }} | |
| sampled_est <- function(G, perc_subsample=.1, max_samples=1e3){ | |
| N <- gorder(G) | |
| m <- floor(perc_subsample*N) | |
| m <- ifelse(m>max_samples, max_samples, m) | |
| lapply(V(G), function(v){ | |
| q <- igraph::shortest.paths(G, v, sample(N, m)) | |
| rowSums(q*is.finite(q),na.rm = TRUE)/m | |
| }) | |
| } | |
| sampled_est_par <- function(G, perc_subsample=.1, max_samples=1e3){ | |
| N <- gorder(G) | |
| m <- floor(perc_subsample*N) | |
| m <- ifelse(m>max_samples, max_samples, m) | |
| foreach(v=1:N) %dopar% { | |
| q <- igraph::shortest.paths(G, v, sample(N, m)) | |
| rowSums(q*is.finite(q),na.rm = TRUE)/m | |
| } | |
| } | |
| timeit <- function(vals, reps=10, summary_method=mean){ | |
| results <- matrix(NA, length(vals), 7) | |
| funcs = list(sj_par, sampled_est, sj_alg) | |
| for(i in 1:length(vals)){ | |
| print(vals[i]) | |
| g <- barabasi.game(n=vals[i], m=2) | |
| results_i = matrix(NA, reps, 7) | |
| for(j in 1:reps){ | |
| for(k in 1:length(funcs)){ | |
| results_i[j,k] <- system.time(test1 <- funcs[[k]](g))[3] | |
| } | |
| results_i[j,4] <- system.time(test2 <- sampled_est_par(g), .10)[3] | |
| results_i[j,5] <- system.time(test3 <- sampled_est_par(g), .05)[3] | |
| err_10p <- unlist(test1) - unlist(test2) | |
| err_5p <- unlist(test1) - unlist(test3) | |
| results_i[j,6] <- mean(abs(err_10p)/unlist(test1)) | |
| results_i[j,7] <- mean(abs(err_5p)/unlist(test1)) | |
| } | |
| results[i,] = apply(results_i, 2, summary_method) | |
| } | |
| results | |
| } | |
| n_nodes <- c(100,500,1000,3000,5000) | |
| test <- timeit(n_nodes, reps=3, summary_method=min) # reporting "best of three" | |
| plot(0,0, type='n', | |
| xlim=range(n_nodes), | |
| ylim=range(test[,1:4]), | |
| main='Computational effeciency', | |
| xlab='n_nodes', ylab='time (s)' | |
| ) | |
| sapply(1:5, function(i) lines(n_nodes, test[,i], col=i)) | |
| legend('topleft', c('sj_par', 'sampled_est', 'sj_alg', | |
| 'sampled_est_par (.10)', 'sampled_est_par (.05)'), | |
| lty=1, col=1:5) | |
| plot(n_nodes, test[,7]*100, type='l', main='Sampling Error', ylab='Avg % Err') | |
| lines(n_nodes, test[,6]*100, type='l', col=2) | |
| legend('topright', c('5% sampling','10% sampling'), lty=1, col=1:2) | |
| par_imprv = 100*abs(test[,3] - test[,1])/test[,3] | |
| smpl_imprv_par = 100*abs(test[,4] - test[,1])/test[,1] | |
| smpl_imprv_nopar = 100*abs(test[,2] - test[,3])/test[,3] | |
| total_imprv = 100*abs(test[,4]-test[,3])/test[,3] | |
| plot(n_nodes, par_imprv, type='l', main='% Improvement from parallelism') | |
| plot(n_nodes, smpl_imprv_par, type='l', main='% Improvement from sampling (parallelized)') | |
| plot(n_nodes, smpl_imprv_nopar, type='l', main='% Improvement from sampling (unparallelized)') | |
| plot(n_nodes, total_imprv, type='l', main='% Total improvement') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment