Last active
January 24, 2021 08:57
-
-
Save roblanf/241978afe4237b3b605f to your computer and use it in GitHub Desktop.
code to make two basic plots showing gender balance in an institution. An example dataset is here: https://gist.github.com/roblanf/f0f9e331adc5aae84fb1. Full description here: www.robertlanfear.com/blog/files/visualising_gender_balance_R.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(reshape2) | |
library(plyr) | |
all = read.csv("genderdata.csv") | |
######################## Plot 1 ############################################### | |
# The raw data: number of men and women in each role, by year | |
# we need to do a bit of work so we can plot roles in the right order | |
all_roles = c("Technical Staff", "Professional Staff", "Faculty Staff", "MSc student completion", "PhD student completion", "Postdoc", "Associate Lecturer", "Lecturer", "Senior Lecturer", "Associate Professor", "Professor", "Distinguished Professor", "Emeritus Professor") | |
all$role = factor(all$role, levels = all_roles) | |
all$order = as.numeric(all$role)/100 # a hack, but it works | |
# stacked pyramid plot, thanks to Didzis Elferts: http://stackoverflow.com/questions/14680075/simpler-population-pyramid-in-ggplot2 | |
ggplot(data = all, aes(x = factor(order), fill = gender)) + | |
geom_bar(subset=.(gender=="woman")) + | |
geom_bar(subset=.(gender=="man"),aes(y=..count..*(-1))) + | |
scale_x_discrete(labels = all_roles) + | |
xlab("role") + | |
coord_flip() + | |
theme(text = element_text(size=16)) + | |
scale_y_continuous(breaks=seq(-40,40,10),labels=abs(seq(-40,40,10))) + | |
scale_fill_brewer(palette="Dark2") + | |
facet_wrap(~year) | |
######################## Plot 2 ############################################### | |
# Yearly proportions of women in staff, students/postdocs, faculty | |
# define some groups of people to sumamrise the data | |
staff = c("Technical Staff", "Professional Staff", "Faculty Staff") | |
s.p = c("MSc student completion", "PhD student completion", "Postdoc") | |
faculty = c("Associate Lecturer", "Lecturer", "Senior Lecturer", "Associate Professor", "Professor", "Distinguished Professor", "Emeritus Professor") | |
all$group[all$role %in% staff] = 'staff' | |
all$group[all$role %in% s.p] = 'students.and.postdocs' | |
all$group[all$role %in% faculty] = 'faculty' | |
# make a summary table | |
group.counts = as.data.frame(with(all, table(group, gender, year))) | |
group.counts = dcast(melt(group.counts), group * year ~ gender) | |
group.counts$proportion.women = group.counts$woman / (group.counts$man + group.counts$woman) | |
group.counts$group = factor(group.counts$group, levels = c("faculty", "students.and.postdocs", "staff")) | |
group.counts$year = as.numeric(as.character(group.counts$year)) | |
# a function to get 95% CIs from binary count data, and we add a column of CIs to the dataframe | |
propci = function(r) prop.test(matrix(c(r$woman, r$man), nrow=1))$conf.int | |
group.counts = adply(group.counts, 1, propci) | |
ggplot(group.counts, aes(x = year, y = proportion.women, ymin = V1, ymax = V2)) + | |
geom_hline(aes(yintercept = 0.5), colour = "red", alpha = 0.5, size = 2) + | |
scale_x_continuous(labels = levels(factor(group.counts$year)), breaks = as.numeric(as.character(levels(factor(group.counts$year))))) + | |
geom_point(size = 4) + | |
geom_errorbar(aes(width = 0), size=1) + | |
geom_line(alpha = 0.5, linetype = "dashed", size=1) + | |
ylim(c(0,1)) + | |
theme(axis.text.x = element_text(angle = 45, hjust = 1), text = element_text(size=20)) + | |
facet_wrap(~group, nrow = 1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment