Last active
June 26, 2016 03:43
-
-
Save mmparker/54f1fd05e62671f80dfb4b855231e6ee to your computer and use it in GitHub Desktop.
A workflow for creating a plot that shows sleep times and wake times as a bar for each day. Reference plot: https://pbs.twimg.com/media/Cl1Bk-JUgAAZrEX.jpg
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Setup | |
options(stringsAsFactors = FALSE) | |
library(lubridate) | |
library(ggplot2) | |
################################################################################ | |
# Simulating fake data - this is just for my benefit, really | |
################################################################################ | |
# Number of days to simulate | |
n_days <- 100 | |
# Simulate sleep times first - starting with an arbitrary time and adding some | |
# noise with rnorm() | |
sleepytimes <- data.frame(start_sleep = seq(from = as.POSIXct("2016-06-25 22:30"), | |
length.out = n_days, | |
by = "1 day") + | |
rnorm(n = n_days, sd = 60*30) | |
) | |
# I'm also going to add a row for going to sleep after midnight, to make sure | |
# the code works for that scenario | |
sleepytimes <- rbind( | |
sleepytimes, | |
data.frame(start_sleep = as.POSIXct(paste(max(date(sleepytimes$start_sleep)) + 2, | |
"02:17:23"))) | |
) | |
# Add hours and minutes of sleep... in a mildly absurd way. First, generate | |
# random, decimal hours of sleep: | |
sleepytimes$sleep_hours <- rnorm(n = n_days + 1, mean = 8) | |
# Now: subtract the whole hours and convert the decimal part to minutes | |
sleepytimes$sleep_minutes <- with(sleepytimes, | |
round((sleep_hours - floor(sleep_hours)) * 60) | |
) | |
# Then go back and round the decimal hours down to whole hours | |
sleepytimes$sleep_hours <- floor(sleepytimes$sleep_hours) | |
################################################################################ | |
# The code you'll actually want to test on your data | |
################################################################################ | |
# To calculate waking times, calculate a duration() from your hours and minutes, | |
# and add it to your sleep start time: | |
sleepytimes$end_sleep <- with(sleepytimes, | |
start_sleep + duration(hours = sleep_hours, minutes = sleep_minutes) | |
) | |
# Now, to plot... you're totally right that crossing zero is going to be a | |
# problem if we treat it like a datetime. So I think the easiest thing to do | |
# subtract midnight from the sleep and wake times... | |
# First, I'm going to try to associate every sleep period with a particular date, | |
# even if sleep didn't start until after midnight. Rule of thumb: if sleep | |
# starts before 12pm on Tuesday, it's counted for Monday night | |
sleepytimes$night_of <- with(sleepytimes, | |
# The ifelse() function returns integers instead of Dates for some reason, | |
# so gotta convert back to Dates | |
as.Date( | |
# Sleep started before 12pm? If yes, use previous date | |
ifelse(hour(start_sleep) > 12, | |
yes = date(start_sleep), | |
no = date(start_sleep) - 1), | |
origin = "1970-01-01") | |
) | |
# Now I'll use the difftime() function to calculate hours until/hours since | |
# midnight for all of the sleep start times - these will usually be positive: | |
sleepytimes$start_sleep_diff <- with(sleepytimes, | |
as.numeric( | |
difftime(time1 =as.POSIXct(paste((night_of + 1), "00:00:00")), | |
time2 = start_sleep, | |
units = "hours") | |
) | |
) | |
# And again for all of the wake times - these will usually be negative: | |
sleepytimes$end_sleep_diff <- with(sleepytimes, | |
as.numeric( | |
difftime(time1 = as.POSIXct(paste((night_of + 1), "00:00:00")), | |
time2 = end_sleep, | |
units = "hours") | |
) | |
) | |
# Now to plot it. | |
# You might want to tweak this to get the bar spacing just right. 0.1 looks | |
# pretty good as a starting point: | |
bar_spacing <- 0.1 | |
# You can tweak the smoothness of the two trend lines by fiddling with this | |
# parameter. 0.1 makes a good start here, too: | |
smooth_span <- 0.1 | |
# To get actual time labels, we need a function that will take our y values | |
# and return actual times. | |
label_hours <- function(x) { | |
require(lubridate) | |
# Start with midnight, then subtract the "time to midnight" variables - | |
# just reversing what we did before, basically, then formatting | |
# to show just the hours (%I) and AM/PM indicator (%p) | |
format(as.POSIXct(paste(Sys.Date(), "00:00:00")) - hours(x), "%I:00 %p") | |
} | |
# Plot it! | |
ggplot(sleepytimes) + | |
# geom_rect needs all four corners of the rectangle. xmin and xmax use the | |
# night_of variable (which is a Date): | |
geom_rect(aes(xmin = night_of + bar_spacing, | |
xmax = night_of + (1 - bar_spacing), | |
# ymin and ymax use the "time to midnight" variables: | |
ymin = start_sleep_diff, | |
ymax = end_sleep_diff), | |
fill = "#2b8cbe") + | |
# Adding smoothed regression lines. | |
geom_smooth(aes(x = night_of, y = start_sleep_diff), | |
method = loess, method.args = list(span = smooth_span), | |
se = FALSE, color = "#045a8d") + | |
geom_smooth(aes(x = night_of, y = end_sleep_diff), | |
method = loess, method.args = list(span = smooth_span), | |
se = FALSE, color = "#045a8d") + | |
# Labels | |
labs(x = "Date", | |
y = "Time", | |
main = "Sleep and Wake Times") + | |
# Use scale_y_continuous to apply the label_hours function we made | |
scale_y_continuous(labels = label_hours) + | |
# A nicer theme than the default | |
theme_bw() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Lesson learned: when naming datasets, think carefully about how much you'll regret a whimsical choice after typing it 300 times.