Last active
August 29, 2015 14:07
-
-
Save frederik-elwert/b785d84aa852c75392de to your computer and use it in GitHub Desktop.
HeForShe campaign analysis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(reshape2) | |
library(plyr) | |
library(dplyr) | |
library(RCurl) | |
library(RJSONIO) | |
library(stringr) | |
library(pander) | |
library(RColorBrewer) | |
library(rMaps) | |
# Read population data from Worldbank | |
# library(WDI) did not work in this case ... | |
data_raw <- read.csv("Gender_Data.csv") | |
data_raw <- droplevels(filter(data_raw, Series.Code != "")) | |
# I rewrote parts of the code while learning about modern R libraries | |
# like plyr, dplyr, reshape2. I will leave the old code in comments, | |
# as an example for others and a reminder for me. | |
# data = NULL | |
# for (var in levels(data_raw$Series.Code)) { | |
# data_new <- filter(data_raw, Series.Code == var) | |
# if (is.null(data)) { | |
# data <- select(data_new, Country.Name, Country.Code, YR2013) | |
# colnames(data) <- c("Country.Name", "Country.Code", var) | |
# } else { | |
# data_new <- select(data_new, Country.Code, YR2013) | |
# colnames(data_new) <- c("Country.Code", var) | |
# data <- inner_join(data, data_new, by="Country.Code") | |
# } | |
# } | |
data <- dcast(data_raw, Country.Name + Country.Code ~ Series.Code) | |
# Load live data with signers per country | |
raw = getURL("http://www.heforshe.org/signers.js") | |
raw <- str_extract(raw, "\\[.*\\]") | |
signers <- fromJSON(I(raw)) | |
# n = length(signers) | |
# | |
# signers_df <- data.frame(Country.Code = I(character(n)), | |
# Signers = integer(n)) | |
# for (i in 1:n) { | |
# signers_df[i, 1] <- signers[[i]][["id"]] | |
# signers_df[i, 2] <- as.integer(signers[[i]][["value"]]) | |
# } | |
signers_df <- ldply(signers, | |
function(x) data.frame(Country.Code = x[["id"]], | |
Signers = as.integer(x[["value"]]))) | |
#setdiff(data$Country.Code, signers_df$Country.Code) | |
# Some countries in WB have wrong/old country codes. Remap them. | |
# Signers Data missing in WB data | |
#misscodes <- setdiff(signers_df$Country.Code, data$Country.Code) | |
# [1] "AND" "TLS" "COD" "ROU" | |
# find them in ISO table | |
#data("ISO_3166_1") | |
#missnames <- filter(ISO_3166_1, Alpha_3 %in% misscodes)$Name | |
# Other name in WB | |
#missnames[2] <- "Congo, Dem. Rep." | |
# Their code in WB | |
#filter(data, Country.Name %in% missnames)$Country.Code | |
# [1] ADO ZAR ROM TMP | |
data$Country.Code <- revalue(data$Country.Code, | |
c("ADO"="AND", "ZAR"="COD", "ROM"="ROU", | |
"TMP"="TLS")) | |
data <- inner_join(data, signers_df, by = "Country.Code") | |
attach(data) | |
# male = total - female | |
male_total <- SP.POP.TOTL - SP.POP.TOTL.FE.IN | |
# under_15 = total * under_15 (%) | |
pop_under_15 <- SP.POP.TOTL * (SP.POP.0014.TO.ZS / 100) | |
# male_under_15 = under_15 - female_under_15 | |
male_under_15 <- pop_under_15 - SP.POP.0014.FE.IN | |
male_15_plus <- male_total - male_under_15 | |
perc_signers = Signers / male_15_plus | |
detach(data) | |
data <- mutate(data, Signers.Rel = perc_signers) | |
data <- filter(data, !is.na(Signers.Rel)) | |
attach(data) | |
# Countries by signers (absolute) | |
countries_abs <- Country.Name[order(Signers, decreasing = TRUE)] | |
pandoc.list(countries_abs[1:10]) | |
# Countries by signers (relative) | |
countries_rel <- Country.Name[order(Signers.Rel, decreasing = TRUE)] | |
pandoc.list(countries_rel[1:10]) | |
# Where is Germany now? | |
pos_germany <- which(countries_rel == 'Germany') | |
pos_germany | |
# Now plot | |
col <- rep("grey", length(Signers.Rel)) | |
col[c(1, pos_germany)] <- "#D80056" | |
barplot(sort(Signers.Rel, decreasing = TRUE)[1:50], col = col) | |
## Interactive map | |
## Simple version | |
# ichoropleth(Signers.Rel ~ Country.Code, data = data, | |
# pal = "PuRd", ncuts = 5, labels = FALSE, | |
# map = "world") | |
## Custom version | |
fillKey = cut(data$Signers.Rel, | |
quantile(data$Signers.Rel, seq(0, 1, 1/5)), | |
labels=LETTERS[1:5], include.lowest = TRUE) | |
levels(fillKey) <- LETTERS[1:6] | |
fillKey[which.max(data$Signers.Rel)] <- "F" | |
map_data <- data.frame(countryCode = data$Country.Code, | |
signersRel = data$Signers.Rel, | |
signersAbs = data$Signers, | |
fillKey = fillKey) | |
fills <- setNames( | |
c(colorRampPalette(c("white", "#D80056"))(7)[2:7], "gray"), | |
c(LETTERS[1:6], "defaultFill") | |
) | |
map <- Datamaps$new() | |
map$set( | |
scope = "world", | |
fills = fills, | |
data = dlply(map_data, "countryCode"), | |
legend = FALSE, | |
labels = FALSE, | |
geographyConfig = list( | |
highlightFillColor = "#DE3379", | |
popupTemplate = "#! function(geography, data) { | |
return '<div class=hoverinfo><strong>' + geography.properties.name + '</strong><br/>' + (data.signersRel * 100).toFixed(3) + '% (' + data.signersAbs + ' men)</div>'; | |
} !#" | |
) | |
) | |
map | |
map$save("heforshe.html", cdn = TRUE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment