Skip to content

Instantly share code, notes, and snippets.

@thoughtfulbloke
Created June 11, 2018 10:30
Show Gist options
  • Save thoughtfulbloke/2116a5b8700c02350907dcc83308bab1 to your computer and use it in GitHub Desktop.
Save thoughtfulbloke/2116a5b8700c02350907dcc83308bab1 to your computer and use it in GitHub Desktop.
Family groups and income deciles from NZ HES
HES9 <- read.csv("household-income-and-houseing-statistics-ye-june2-17-csv-tables-corrected/HES2017-Table9.csv",
stringsAsFactors = FALSE)
library(dplyr)
library(tidyr)
library(purrr)
library(ggplot2)
library(ggbeeswarm)
library(ggthemes)
atomise <- function(df) {
df2 <- data.frame(disagg = rep(df$d2, round(df$households, 0)))
df2$adj <- round((1:nrow(df2))/nrow(df2),1) *.94 - 0.47
df2$decil <- df2$disagg + df2$adj
return(df2)
}
one_parent <- HES9 %>% filter(Rcode == "HHC06") %>%
select(Year, Est1:Est10) %>%
gather(key=dectext, value=households, Est1:Est10, convert=TRUE) %>%
mutate(decile= as.numeric(gsub("Est", "", dectext)),
d2 = decile) %>%
filter(!is.na(households)) %>%
select(-dectext) %>%
group_by(Year, decile) %>%
nest() %>%
mutate(dec = map(data, atomise)) %>%
select(-data) %>%
unnest(dec) %>%
ungroup() %>%
mutate(household_type ="One parent households")
HES9 %>% filter(Rcode %in% c("HHC02", "HHC03", "HHC04", "HHC05")) %>%
select(Year, Est1:Est10) %>%
gather(key=dectext, value=households, Est1:Est10, convert=TRUE) %>%
filter(!is.na(households)) %>%
group_by(Year, dectext) %>%
summarise(households = sum(households)) %>%
ungroup() %>%
mutate(decile= as.numeric(gsub("Est", "", dectext)),
d2 = decile) %>%
select(-dectext) %>%
group_by(Year, decile) %>%
nest() %>%
mutate(dec = map(data, atomise)) %>%
select(-data) %>%
unnest(dec) %>%
ungroup() %>%
mutate(household_type ="Two parent households") %>%
bind_rows(one_parent) %>%
ggplot(aes(x=Year, y=decil, colour=as.factor(decile))) +
geom_quasirandom(width=0.41, size=0.2) +
theme_tufte() + facet_wrap(~ household_type, nrow=2) +
geom_hline(yintercept = 0.5, colour="#EEEEEE") +
geom_hline(yintercept = 1.5, colour="#EEEEEE") +
geom_hline(yintercept = 2.5, colour="#EEEEEE") +
geom_hline(yintercept = 3.5, colour="#EEEEEE") +
geom_hline(yintercept = 4.5, colour="#EEEEEE") +
geom_hline(yintercept = 5.5, colour="#EEEEEE") +
geom_hline(yintercept = 6.5, colour="#EEEEEE") +
geom_hline(yintercept = 7.5, colour="#EEEEEE") +
geom_hline(yintercept = 8.5, colour="#EEEEEE") +
geom_hline(yintercept = 9.5, colour="#EEEEEE") +
geom_hline(yintercept = 10.5, colour="#EEEEEE") +
ylab("decile") +
ggtitle("HES income deciles by family type,\n1 dot=1000 households") +
scale_y_continuous(breaks=1:10) +
theme(legend.position="none")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment