Forked from ryanpraski/apple_health_load_analysis_R.r
Created
August 13, 2016 03:31
-
-
Save pbamotra/228e4813cac6e901ec21ac33b1d476da to your computer and use it in GitHub Desktop.
Load Apple Health Kit export.xml file in R then analyze and visualize Steps Data using R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(ggplot2) | |
library(lubridate) | |
library(XML) | |
#load apple health export.xml file | |
xml <- xmlParse("C:\\Users\\praskry\\Desktop\\apple_health_data\\export.xml") | |
#transform xml file to data frame - select the Record rows from the xml file | |
df <- XML:::xmlAttrsToDataFrame(xml["//Record"]) | |
str(df) | |
#make value variable numeric | |
df$value <- as.numeric(as.character(df$value)) | |
str(df) | |
#make endDate in a date time variable POSIXct using lubridate with eastern time zone | |
df$endDate <-ymd_hms(df$endDate,tz="America/New_York") | |
str(df) | |
##add in year month date dayofweek hour columns | |
df$month<-format(df$endDate,"%m") | |
df$year<-format(df$endDate,"%Y") | |
df$date<-format(df$endDate,"%Y-%m-%d") | |
df$dayofweek <-wday(df$endDate, label=TRUE, abbr=FALSE) | |
df$hour <-format(df$endDate,"%H") | |
str(df) | |
#show steps by month by year using dplyr then graph using ggplot2 | |
df %>% | |
filter(type == 'HKQuantityTypeIdentifierStepCount') %>% | |
group_by(year,month) %>% | |
summarize(steps=sum(value)) %>% | |
#print table steps by month by year | |
print (n=100) %>% | |
#graph data by month by year | |
ggplot(aes(x=month, y=steps, fill=year)) + | |
geom_bar(position='dodge', stat='identity') + | |
scale_y_continuous(labels = scales::comma) + | |
scale_fill_brewer() + | |
theme_bw() + | |
theme(panel.grid.major = element_blank()) | |
#boxplot data by month by year | |
df %>% | |
filter(type == 'HKQuantityTypeIdentifierStepCount') %>% | |
group_by(date,month,year) %>% | |
summarize(steps=sum(value)) %>% | |
#print table steps by date by month by year | |
print (n=100) %>% | |
ggplot(aes(x=month, y=steps)) + | |
geom_boxplot(aes(fill=(year))) + | |
scale_fill_brewer() + | |
theme_bw() + | |
theme(panel.grid.major = element_blank()) | |
#summary statistics by month for 2015 | |
df %>% | |
filter(type == 'HKQuantityTypeIdentifierStepCount') %>% | |
group_by(date,month,year) %>% | |
summarize(steps=sum(value)) %>% | |
filter(year==2015) %>% | |
group_by(month) %>% | |
summarize(mean = round(mean(steps), 2), sd = round(sd(steps), 2), | |
median = round(median(steps), 2), max = round(max(steps), 2), | |
min = round(min(steps), 2),`25%`= quantile(steps, probs=0.25), | |
`75%`= quantile(steps, probs=0.75)) | |
#boxplot data by day of week year | |
df %>% | |
filter(type == 'HKQuantityTypeIdentifierStepCount') %>% | |
group_by(dayofweek,date,year) %>% | |
summarize(steps=sum(value)) %>% | |
#print table steps by date by month by year | |
print (n=100) %>% | |
ggplot(aes(x=dayofweek, y=steps)) + | |
geom_boxplot(aes(fill=(year))) + | |
scale_fill_brewer() + | |
theme_bw() + | |
theme(panel.grid.major = element_blank()) | |
#summary statistics by day of week for 2015 | |
df %>% | |
filter(type == 'HKQuantityTypeIdentifierStepCount') %>% | |
group_by(dayofweek,date,year) %>% | |
summarize(steps=sum(value)) %>% | |
filter(year==2015) %>% | |
group_by(dayofweek) %>% | |
summarize(mean = round(mean(steps), 2), sd = round(sd(steps), 2), | |
median = round(median(steps), 2), max = round(max(steps), 2), | |
min = round(min(steps), 2),`25%`= quantile(steps, probs=0.25), | |
`75%`= quantile(steps, probs=0.75)) %>% | |
arrange(desc(median)) | |
#heatmap day of week hour of day | |
df %>% | |
filter(type == 'HKQuantityTypeIdentifierStepCount') %>% | |
group_by(date,dayofweek,hour) %>% | |
summarize(steps=sum(value)) %>% | |
group_by(hour,dayofweek) %>% | |
summarize(steps=sum(steps)) %>% | |
arrange(desc(steps)) %>% | |
#print table steps by date by month by year | |
print (n=100) %>% | |
ggplot(aes(x=dayofweek, y=hour, fill=steps)) + | |
geom_tile() + | |
scale_fill_continuous(labels = scales::comma, low = 'white', high = 'red') + | |
theme_bw() + | |
theme(panel.grid.major = element_blank()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment