Created
July 14, 2016 21:21
-
-
Save ryanpraski/f2a472e02a08f1e4172ffbe04cca9cd1 to your computer and use it in GitHub Desktop.
Analyze and Visualize Apple Health Kit Steps Data using R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(ggplot2) | |
library(lubridate) | |
#load steps data into data frame | |
dfsteps <- read.csv("C:\\Users\\praskry\\Desktop\\apple_health_data\\StepCount.csv") | |
str(dfsteps) | |
#make endDate in a date time variable POSIXct using lubridate with eastern time zone | |
dfsteps$endDate <-ymd_hms(dfsteps$endDate,tz="America/New_York") | |
str(dfsteps) | |
##add in year month date dayofweek hour columns | |
dfsteps$month<-format(dfsteps$endDate,"%m") | |
dfsteps$year<-format(dfsteps$endDate,"%Y") | |
dfsteps$date<-format(dfsteps$endDate,"%Y-%m-%d") | |
dfsteps$dayofweek <-wday(dfsteps$endDate, label=TRUE, abbr=FALSE) | |
dfsteps$hour <-format(dfsteps$endDate,"%H") | |
str(dfsteps) | |
#show steps by month by year using dplyr then graph using ggplot2 | |
dfsteps %>% | |
group_by(year,month) %>% | |
summarize(steps=sum(value)) %>% | |
#print table steps by month by year | |
print (n=100) %>% | |
#graph data by month by year | |
ggplot(aes(x=month, y=steps, fill=year)) + | |
geom_bar(position='dodge', stat='identity') + | |
scale_y_continuous(labels = scales::comma) + | |
scale_fill_brewer() + | |
theme_bw() + | |
theme(panel.grid.major = element_blank()) | |
#boxplot data by month by year | |
dfsteps %>% | |
group_by(date,month,year) %>% | |
summarize(steps=sum(value)) %>% | |
#print table steps by date by month by year | |
print (n=100) %>% | |
ggplot(aes(x=month, y=steps)) + | |
geom_boxplot(aes(fill=(year))) + | |
scale_fill_brewer() + | |
theme_bw() + | |
theme(panel.grid.major = element_blank()) | |
#summary statistics by month for 2015 | |
dfsteps %>% | |
group_by(date,month,year) %>% | |
summarize(steps=sum(value)) %>% | |
filter(year==2015) %>% | |
group_by(month) %>% | |
summarize(mean = round(mean(steps), 2), sd = round(sd(steps), 2), | |
median = round(median(steps), 2), max = round(max(steps), 2), | |
min = round(min(steps), 2),`25%`= quantile(steps, probs=0.25), | |
`75%`= quantile(steps, probs=0.75)) | |
#boxplot data by day of week year | |
dfsteps %>% | |
group_by(dayofweek,date,year) %>% | |
summarize(steps=sum(value)) %>% | |
#print table steps by date by month by year | |
print (n=100) %>% | |
ggplot(aes(x=dayofweek, y=steps)) + | |
geom_boxplot(aes(fill=(year))) + | |
scale_fill_brewer() + | |
theme_bw() + | |
theme(panel.grid.major = element_blank()) | |
#summary statistics by day of week for 2015 | |
dfsteps %>% | |
group_by(dayofweek,date,year) %>% | |
summarize(steps=sum(value)) %>% | |
filter(year==2015) %>% | |
group_by(dayofweek) %>% | |
summarize(mean = round(mean(steps), 2), sd = round(sd(steps), 2), | |
median = round(median(steps), 2), max = round(max(steps), 2), | |
min = round(min(steps), 2),`25%`= quantile(steps, probs=0.25), | |
`75%`= quantile(steps, probs=0.75)) %>% | |
arrange(desc(median)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment