Last active
April 30, 2023 21:08
-
-
Save ryanpraski/ba9baee2583cfb1af88ca4ec62311a3d to your computer and use it in GitHub Desktop.
Load Apple Health Kit export.xml file in R then analyze and visualize Steps Data using R. See the full post here: http://www.ryanpraski.com/apple-health-data-how-to-export-analyze-visualize-guide/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(ggplot2) | |
library(lubridate) | |
library(XML) | |
#load apple health export.xml file | |
xml <- xmlParse("C:\\Users\\praskry\\Desktop\\apple_health_data\\export.xml") | |
#transform xml file to data frame - select the Record rows from the xml file | |
df <- XML:::xmlAttrsToDataFrame(xml["//Record"]) | |
str(df) | |
#make value variable numeric | |
df$value <- as.numeric(as.character(df$value)) | |
str(df) | |
#make endDate in a date time variable POSIXct using lubridate with eastern time zone | |
df$endDate <-ymd_hms(df$endDate,tz="America/New_York") | |
str(df) | |
##add in year month date dayofweek hour columns | |
df$month<-format(df$endDate,"%m") | |
df$year<-format(df$endDate,"%Y") | |
df$date<-format(df$endDate,"%Y-%m-%d") | |
df$dayofweek <-wday(df$endDate, label=TRUE, abbr=FALSE) | |
df$hour <-format(df$endDate,"%H") | |
str(df) | |
#show steps by month by year using dplyr then graph using ggplot2 | |
df %>% | |
filter(type == 'HKQuantityTypeIdentifierStepCount') %>% | |
group_by(year,month) %>% | |
summarize(steps=sum(value)) %>% | |
#print table steps by month by year | |
print (n=100) %>% | |
#graph data by month by year | |
ggplot(aes(x=month, y=steps, fill=year)) + | |
geom_bar(position='dodge', stat='identity') + | |
scale_y_continuous(labels = scales::comma) + | |
scale_fill_brewer() + | |
theme_bw() + | |
theme(panel.grid.major = element_blank()) | |
#boxplot data by month by year | |
df %>% | |
filter(type == 'HKQuantityTypeIdentifierStepCount') %>% | |
group_by(date,month,year) %>% | |
summarize(steps=sum(value)) %>% | |
#print table steps by date by month by year | |
print (n=100) %>% | |
ggplot(aes(x=month, y=steps)) + | |
geom_boxplot(aes(fill=(year))) + | |
scale_fill_brewer() + | |
theme_bw() + | |
theme(panel.grid.major = element_blank()) | |
#summary statistics by month for 2015 | |
df %>% | |
filter(type == 'HKQuantityTypeIdentifierStepCount') %>% | |
group_by(date,month,year) %>% | |
summarize(steps=sum(value)) %>% | |
filter(year==2015) %>% | |
group_by(month) %>% | |
summarize(mean = round(mean(steps), 2), sd = round(sd(steps), 2), | |
median = round(median(steps), 2), max = round(max(steps), 2), | |
min = round(min(steps), 2),`25%`= quantile(steps, probs=0.25), | |
`75%`= quantile(steps, probs=0.75)) | |
#boxplot data by day of week year | |
df %>% | |
filter(type == 'HKQuantityTypeIdentifierStepCount') %>% | |
group_by(dayofweek,date,year) %>% | |
summarize(steps=sum(value)) %>% | |
#print table steps by date by month by year | |
print (n=100) %>% | |
ggplot(aes(x=dayofweek, y=steps)) + | |
geom_boxplot(aes(fill=(year))) + | |
scale_fill_brewer() + | |
theme_bw() + | |
theme(panel.grid.major = element_blank()) | |
#summary statistics by day of week for 2015 | |
df %>% | |
filter(type == 'HKQuantityTypeIdentifierStepCount') %>% | |
group_by(dayofweek,date,year) %>% | |
summarize(steps=sum(value)) %>% | |
filter(year==2015) %>% | |
group_by(dayofweek) %>% | |
summarize(mean = round(mean(steps), 2), sd = round(sd(steps), 2), | |
median = round(median(steps), 2), max = round(max(steps), 2), | |
min = round(min(steps), 2),`25%`= quantile(steps, probs=0.25), | |
`75%`= quantile(steps, probs=0.75)) %>% | |
arrange(desc(median)) | |
#heatmap day of week hour of day | |
df %>% | |
filter(type == 'HKQuantityTypeIdentifierStepCount') %>% | |
group_by(date,dayofweek,hour) %>% | |
summarize(steps=sum(value)) %>% | |
group_by(hour,dayofweek) %>% | |
summarize(steps=sum(steps)) %>% | |
arrange(desc(steps)) %>% | |
#print table steps by date by month by year | |
print (n=100) %>% | |
ggplot(aes(x=dayofweek, y=hour, fill=steps)) + | |
geom_tile() + | |
scale_fill_continuous(labels = scales::comma, low = 'white', high = 'red') + | |
theme_bw() + | |
theme(panel.grid.major = element_blank()) |
how can i export data to xml file??
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Great work! Do you know how to format the data for an interday file that has summary variables for steps, resting heart rate, etc. as well as a file that has intraday file that has heart rate, steps, etc. every 10 minutes?