Skip to content

Instantly share code, notes, and snippets.

@dmarx
Last active December 30, 2017 06:56
Show Gist options
  • Select an option

  • Save dmarx/80b4d093bdcab2fff97ee0da2968084f to your computer and use it in GitHub Desktop.

Select an option

Save dmarx/80b4d093bdcab2fff97ee0da2968084f to your computer and use it in GitHub Desktop.
install.packages('doParallel')
library(igraph)
library(foreach)
library(doParallel)
stopCluster(cl)
cl <- makeCluster(8)
registerDoParallel(cl)
sj_alg <- function(G){
N <- gorder(G)
lapply(V(G), function(v){
q <- shortest.paths(G, v )
rowSums(q*is.finite(q),na.rm = TRUE)/N
})}
sj_par <- function(G){
N <- gorder(G)
foreach(v=1:N) %dopar% {
q <- igraph::shortest.paths(G, v )
rowSums(q*is.finite(q),na.rm = TRUE)/N
}}
sampled_est <- function(G, perc_subsample=.1, max_samples=1e3){
N <- gorder(G)
m <- floor(perc_subsample*N)
m <- ifelse(m>max_samples, max_samples, m)
lapply(V(G), function(v){
q <- igraph::shortest.paths(G, v, sample(N, m))
rowSums(q*is.finite(q),na.rm = TRUE)/m
})
}
sampled_est_par <- function(G, perc_subsample=.1, max_samples=1e3){
N <- gorder(G)
m <- floor(perc_subsample*N)
m <- ifelse(m>max_samples, max_samples, m)
foreach(v=1:N) %dopar% {
q <- igraph::shortest.paths(G, v, sample(N, m))
rowSums(q*is.finite(q),na.rm = TRUE)/m
}
}
timeit <- function(vals, reps=10, summary_method=mean){
results <- matrix(NA, length(vals), 7)
funcs = list(sj_par, sampled_est, sj_alg)
for(i in 1:length(vals)){
print(vals[i])
g <- barabasi.game(n=vals[i], m=2)
results_i = matrix(NA, reps, 7)
for(j in 1:reps){
for(k in 1:length(funcs)){
results_i[j,k] <- system.time(test1 <- funcs[[k]](g))[3]
}
results_i[j,4] <- system.time(test2 <- sampled_est_par(g), .10)[3]
results_i[j,5] <- system.time(test3 <- sampled_est_par(g), .05)[3]
err_10p <- unlist(test1) - unlist(test2)
err_5p <- unlist(test1) - unlist(test3)
results_i[j,6] <- mean(abs(err_10p)/unlist(test1))
results_i[j,7] <- mean(abs(err_5p)/unlist(test1))
}
results[i,] = apply(results_i, 2, summary_method)
}
results
}
n_nodes <- c(100,500,1000,3000,5000)
test <- timeit(n_nodes, reps=3, summary_method=min) # reporting "best of three"
plot(0,0, type='n',
xlim=range(n_nodes),
ylim=range(test[,1:4]),
main='Computational effeciency',
xlab='n_nodes', ylab='time (s)'
)
sapply(1:5, function(i) lines(n_nodes, test[,i], col=i))
legend('topleft', c('sj_par', 'sampled_est', 'sj_alg',
'sampled_est_par (.10)', 'sampled_est_par (.05)'),
lty=1, col=1:5)
plot(n_nodes, test[,7]*100, type='l', main='Sampling Error', ylab='Avg % Err')
lines(n_nodes, test[,6]*100, type='l', col=2)
legend('topright', c('5% sampling','10% sampling'), lty=1, col=1:2)
par_imprv = 100*abs(test[,3] - test[,1])/test[,3]
smpl_imprv_par = 100*abs(test[,4] - test[,1])/test[,1]
smpl_imprv_nopar = 100*abs(test[,2] - test[,3])/test[,3]
total_imprv = 100*abs(test[,4]-test[,3])/test[,3]
plot(n_nodes, par_imprv, type='l', main='% Improvement from parallelism')
plot(n_nodes, smpl_imprv_par, type='l', main='% Improvement from sampling (parallelized)')
plot(n_nodes, smpl_imprv_nopar, type='l', main='% Improvement from sampling (unparallelized)')
plot(n_nodes, total_imprv, type='l', main='% Total improvement')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment