Skip to content

Instantly share code, notes, and snippets.

@halhen
Last active July 17, 2018 13:20
Show Gist options
  • Save halhen/47bc8f482b18f2d81dd405ec25a70d51 to your computer and use it in GitHub Desktop.
Save halhen/47bc8f482b18f2d81dd405ec25a70d51 to your computer and use it in GitHub Desktop.
The daily grind - viz
library(tidyverse)
# data from https://www.kaggle.com/bls/american-time-use-survey
df.resp <- read_csv('../data/atus/atusresp.csv')
df.act <- read_csv('../data/atus/atusact.csv', col_types=cols(tustarttim = col_character(), tustoptime = col_character()))
df.sum <- read_csv('../data/atus/atussum.csv')
df.tmp <- df.act %>%
mutate(activity = case_when(trtier2p == 1301 ~ 'Exercise',
trtier1p == 5 ~ 'Work',
trtier1p == 6 ~ 'Education',
trtier1p == 18 ~ 'Travel',
trtier1p == 11 ~ 'Eating',
trcodep %in% c(120303, 120304) ~ 'TV',
trcodep == 120307 ~ 'Playing games',
trtier1p == 12 ~ 'Relaxing',
trtier2p >= 301 & trtier2p <= 303 ~ 'Childcare',
trtier2p == 101 ~ 'Sleep',
trtier2p == 202 ~ 'Cooking',
trtier2p == 201 ~ 'Housework')) %>%
filter(!is.na(activity)) %>%
inner_join(df.resp, by='tucaseid') %>%
inner_join(df.sum %>% select(tucaseid, teage), by='tucaseid') %>%
separate(tustarttim, into=c('hour', 'minute', 'second')) %>%
mutate(hour = as.integer(hour),
minute = as.integer(minute)) %>%
mutate(start.epoch = hour * 60 + minute,
end.epoch = (start.epoch + tuactdur24)) %>%
# Roll over midnight; when this happens add one point before midnight and one after
{
rbind(filter(., end.epoch >= 24*60) %>%
mutate(start.epoch = 0,
end.epoch = end.epoch - 24*60),
mutate(., end.epoch = pmin(end.epoch, 24*60-1)))
} %>%
mutate(activity = factor(activity),
age = teage)
# For every by= minute interval, figure out what people where doing
# Then normalize by day and group by weekday/weekend
interval <- 30
df.tmp2 <- expand.grid(epoch = seq(0, 24*60-1, by=interval)) %>%
as.tibble() %>%
rowwise() %>%
do({
search <- .
df.time <- df.tmp %>%
filter(start.epoch <= search$epoch, end.epoch > search$epoch) %>%
group_by(age, activity, tudiaryday) %>%
summarize(score = sum(tufnwgtp),
epoch = search$epoch)
df.time
}) %>%
group_by(age, tudiaryday) %>%
mutate(score = score / max(score)) %>%
group_by(daytype = ifelse(tudiaryday >= 2 & tudiaryday <= 6, 'weekday', 'weekend'),
activity, epoch, age) %>%
summarize(score = sum(score)/n_distinct(tudiaryday))
instructions <- 'Early afternoon travel is teens getting home from school, while working adults get on the road a couple of hours later. Weekday exercise happens after school or work,
until you retire and get out more in the morning. But lunch happens at lunch no matter what age or day... Although it is quicker on weekdays.
Each slice represents an age group doing each activity throughout the day. Time flows left to right and age flows front to back.
To fit both common and rare activities, they are on different scales. Relative height makes sense within each activity.'
df.tmp2 %>%
filter(age <= 76) %>%
# Merge every second age, to get a good amount of lines
group_by(daytype, activity, epoch, age = ceiling(age/2)*2) %>%
summarize(score = sum(score)) %>%
ungroup() %>%
# Add a new data point for 24:00, which equals 00:00
rbind(filter(., epoch == 0) %>% mutate(epoch=24*60)) %>%
# Align start of chart to be 04:00, to fit the format of the data
mutate(epoch = ifelse(epoch < 4*60, epoch + 24*60, epoch)) %>%
# Add missing datapoints, as well as a start and end point for the polygon to be complete
complete(daytype, activity, epoch=c(epoch, min(epoch)-0.001, max(epoch) + 0.001), age, fill=list(score=0)) %>%
# Polygons need to be arranged properly to overlap as we'd like them to
# Here we're definitely being "creative" with ggplot
arrange(daytype, activity, epoch, age) %>%
mutate(age.f = reorder(as.character(age), -age)) %>%
# facet headings
mutate(group_name = paste0(activity, ' - ', daytype)) %>%
# Calculate each points y pos
mutate(y = age + 150 * score/max(score)) %>%
{
ggplot(., aes(epoch/60, y)) +
geom_polygon(aes(group=age.f, fill=activity, alpha=ifelse(activity %in% c('Education', 'Exercise'), 0, 1)), color='white', show.legend=FALSE, size=0.3) +
geom_line(data=filter(., daytype == 'weekday', activity %in% c('Education', 'Exercise'), age <= 20), aes(group=age.f), color='white', linetype='dotted', alpha=0.3) +
# Put an invisible dot at the activity's top y value, to make each activity share y scales
# between weekend and weekday
geom_point(data=group_by(., activity, group_name) %>%
summarize(y=max(y),
epoch=min(epoch)) %>%
mutate(y=max(y)), color=NA) +
facet_wrap(~group_name, scales='free', ncol=6) +
scale_y_continuous(breaks=1:5*15, labels=function(x) {paste0(x, 'yo')}) +
scale_x_continuous(labels=function(x) {sprintf("%02d:00", as.integer(x %% 24))}, breaks=c(4, 8, 12, 16, 20, 24, 28)) +
scale_alpha_continuous(range=c(0.7, 1)) +
labs(x="", y="", title="The daily grind", subtitle=instructions, caption="Source: American Time Use Survey") +
theme_linedraw(base_family='Arial Narrow') +
theme(panel.grid = element_blank(),
strip.text=element_text(size=12),
plot.title=element_text(size=30),
plot.subtitle=element_text(face='italic', size=14),
plot.caption=element_text(face='italic'),
axis.text = element_text(color='#666666'))
}
ggsave('/tmp/out.png', width=17, height=10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment