Last active
March 4, 2018 10:22
-
-
Save PaulC91/462f133e4af11b8da796becfba149be6 to your computer and use it in GitHub Desktop.
Example of using function to create d3 sankey diagram from multiple categorical variables + one numerical value variable of tidy data frame
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(tidygraph) | |
library(igraph) | |
library(networkD3) | |
# https://www.kaggle.com/unitednations/refugee-data/data | |
asylum_seekers_raw <- read_csv("asylum_seekers.csv") | |
# get the top 10 countries of origin by 'Total decisions' | |
top_orig <- asylum_seekers_raw %>% | |
filter(Year == 2016) %>% | |
group_by(Origin) %>% | |
summarise(total = sum(`Total decisions`, na.rm = TRUE)) %>% | |
top_n(10, total) %>% | |
dplyr::pull(1) | |
# list of destination countries we want to look at | |
euro_countries <- | |
c("Germany", "Sweden", "France", "United Kingdom", "Netherlands", "Switzerland", "Italy", "Belgium") | |
# filter and clean the data we want to visualise | |
asylum_seekers <- asylum_seekers_raw %>% | |
filter(Origin %in% top_orig, `Country / territory of asylum/residence` %in% euro_countries) %>% | |
select(1, 3, 2, 8, 10) %>% | |
filter(Year == 2016) %>% | |
select(-Year) %>% | |
drop_na() %>% | |
gather(Decision, Weight, 3:4) | |
############################################## SANKEY FUNCTION ###################################### | |
sankey_func <- function(data, val_col, ...) { | |
weight <- enquo(val_col) | |
df <- 1:(ncol(data)-2) %>% | |
# collapse dataframe into 3 columns: from, to, weight | |
map_df(~ select(data, from = !! quo(names(data)[.x]), to = !! quo(names(data)[.x + 1]), !! weight)) %>% | |
drop_na() %>% | |
group_by(from, to) %>% | |
summarise(weight = sum(!! weight)) %>% | |
mutate(colour = to) | |
ig <- igraph::graph_from_data_frame(df) %>% | |
as_tbl_graph() | |
nodes <- as_tibble(ig) %>% | |
rowid_to_column("id") %>% | |
mutate(id = id -1) %>% | |
as.data.frame | |
edges <- ig %>% | |
activate(edges) %>% | |
as_tibble() %>% | |
mutate(from = from - 1, to = to - 1) %>% | |
as.data.frame | |
sankeyNetwork(Links = edges, Nodes = nodes, Source = "from", Target = "to", | |
NodeID = "name", Value = "weight", LinkGroup = "colour", | |
fontSize = 14, fontFamily = "Roboto") | |
} | |
########################################## END OF FUNCTION ########################################## | |
# run the function on the cleaned asylum seeker data | |
sankey_func(data = asylum_seekers, val_col = Weight) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment