Last active
July 17, 2018 13:20
-
-
Save halhen/47bc8f482b18f2d81dd405ec25a70d51 to your computer and use it in GitHub Desktop.
The daily grind - viz
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
# data from https://www.kaggle.com/bls/american-time-use-survey | |
df.resp <- read_csv('../data/atus/atusresp.csv') | |
df.act <- read_csv('../data/atus/atusact.csv', col_types=cols(tustarttim = col_character(), tustoptime = col_character())) | |
df.sum <- read_csv('../data/atus/atussum.csv') | |
df.tmp <- df.act %>% | |
mutate(activity = case_when(trtier2p == 1301 ~ 'Exercise', | |
trtier1p == 5 ~ 'Work', | |
trtier1p == 6 ~ 'Education', | |
trtier1p == 18 ~ 'Travel', | |
trtier1p == 11 ~ 'Eating', | |
trcodep %in% c(120303, 120304) ~ 'TV', | |
trcodep == 120307 ~ 'Playing games', | |
trtier1p == 12 ~ 'Relaxing', | |
trtier2p >= 301 & trtier2p <= 303 ~ 'Childcare', | |
trtier2p == 101 ~ 'Sleep', | |
trtier2p == 202 ~ 'Cooking', | |
trtier2p == 201 ~ 'Housework')) %>% | |
filter(!is.na(activity)) %>% | |
inner_join(df.resp, by='tucaseid') %>% | |
inner_join(df.sum %>% select(tucaseid, teage), by='tucaseid') %>% | |
separate(tustarttim, into=c('hour', 'minute', 'second')) %>% | |
mutate(hour = as.integer(hour), | |
minute = as.integer(minute)) %>% | |
mutate(start.epoch = hour * 60 + minute, | |
end.epoch = (start.epoch + tuactdur24)) %>% | |
# Roll over midnight; when this happens add one point before midnight and one after | |
{ | |
rbind(filter(., end.epoch >= 24*60) %>% | |
mutate(start.epoch = 0, | |
end.epoch = end.epoch - 24*60), | |
mutate(., end.epoch = pmin(end.epoch, 24*60-1))) | |
} %>% | |
mutate(activity = factor(activity), | |
age = teage) | |
# For every by= minute interval, figure out what people where doing | |
# Then normalize by day and group by weekday/weekend | |
interval <- 30 | |
df.tmp2 <- expand.grid(epoch = seq(0, 24*60-1, by=interval)) %>% | |
as.tibble() %>% | |
rowwise() %>% | |
do({ | |
search <- . | |
df.time <- df.tmp %>% | |
filter(start.epoch <= search$epoch, end.epoch > search$epoch) %>% | |
group_by(age, activity, tudiaryday) %>% | |
summarize(score = sum(tufnwgtp), | |
epoch = search$epoch) | |
df.time | |
}) %>% | |
group_by(age, tudiaryday) %>% | |
mutate(score = score / max(score)) %>% | |
group_by(daytype = ifelse(tudiaryday >= 2 & tudiaryday <= 6, 'weekday', 'weekend'), | |
activity, epoch, age) %>% | |
summarize(score = sum(score)/n_distinct(tudiaryday)) | |
instructions <- 'Early afternoon travel is teens getting home from school, while working adults get on the road a couple of hours later. Weekday exercise happens after school or work, | |
until you retire and get out more in the morning. But lunch happens at lunch no matter what age or day... Although it is quicker on weekdays. | |
Each slice represents an age group doing each activity throughout the day. Time flows left to right and age flows front to back. | |
To fit both common and rare activities, they are on different scales. Relative height makes sense within each activity.' | |
df.tmp2 %>% | |
filter(age <= 76) %>% | |
# Merge every second age, to get a good amount of lines | |
group_by(daytype, activity, epoch, age = ceiling(age/2)*2) %>% | |
summarize(score = sum(score)) %>% | |
ungroup() %>% | |
# Add a new data point for 24:00, which equals 00:00 | |
rbind(filter(., epoch == 0) %>% mutate(epoch=24*60)) %>% | |
# Align start of chart to be 04:00, to fit the format of the data | |
mutate(epoch = ifelse(epoch < 4*60, epoch + 24*60, epoch)) %>% | |
# Add missing datapoints, as well as a start and end point for the polygon to be complete | |
complete(daytype, activity, epoch=c(epoch, min(epoch)-0.001, max(epoch) + 0.001), age, fill=list(score=0)) %>% | |
# Polygons need to be arranged properly to overlap as we'd like them to | |
# Here we're definitely being "creative" with ggplot | |
arrange(daytype, activity, epoch, age) %>% | |
mutate(age.f = reorder(as.character(age), -age)) %>% | |
# facet headings | |
mutate(group_name = paste0(activity, ' - ', daytype)) %>% | |
# Calculate each points y pos | |
mutate(y = age + 150 * score/max(score)) %>% | |
{ | |
ggplot(., aes(epoch/60, y)) + | |
geom_polygon(aes(group=age.f, fill=activity, alpha=ifelse(activity %in% c('Education', 'Exercise'), 0, 1)), color='white', show.legend=FALSE, size=0.3) + | |
geom_line(data=filter(., daytype == 'weekday', activity %in% c('Education', 'Exercise'), age <= 20), aes(group=age.f), color='white', linetype='dotted', alpha=0.3) + | |
# Put an invisible dot at the activity's top y value, to make each activity share y scales | |
# between weekend and weekday | |
geom_point(data=group_by(., activity, group_name) %>% | |
summarize(y=max(y), | |
epoch=min(epoch)) %>% | |
mutate(y=max(y)), color=NA) + | |
facet_wrap(~group_name, scales='free', ncol=6) + | |
scale_y_continuous(breaks=1:5*15, labels=function(x) {paste0(x, 'yo')}) + | |
scale_x_continuous(labels=function(x) {sprintf("%02d:00", as.integer(x %% 24))}, breaks=c(4, 8, 12, 16, 20, 24, 28)) + | |
scale_alpha_continuous(range=c(0.7, 1)) + | |
labs(x="", y="", title="The daily grind", subtitle=instructions, caption="Source: American Time Use Survey") + | |
theme_linedraw(base_family='Arial Narrow') + | |
theme(panel.grid = element_blank(), | |
strip.text=element_text(size=12), | |
plot.title=element_text(size=30), | |
plot.subtitle=element_text(face='italic', size=14), | |
plot.caption=element_text(face='italic'), | |
axis.text = element_text(color='#666666')) | |
} | |
ggsave('/tmp/out.png', width=17, height=10) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment