Last active
June 29, 2021 05:25
-
-
Save boooeee/ab276ad9d4955de318863e6354e7ad8a to your computer and use it in GitHub Desktop.
Code for analyzing and charting your iPhone run data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rm(list = ls(all = TRUE)) | |
library(XML) | |
library(tidyverse) | |
library(lubridate) | |
library(scales) | |
library(ggthemes) | |
library(ggridges) | |
library(rpart) | |
loc<-"" # enter file path location of export.xml file here # | |
xml <- xmlParse(paste0(loc, '/export.xml')) | |
rc <- data.frame(XML:::xmlAttrsToDataFrame(xml["//Record"]),stringsAsFactors = F) | |
# select Nike Run Club data # | |
nk<-rc %>% | |
filter(sourceName=="Nike Run Club",unit == "mi") | |
# format the data # | |
nk <- nk %>% | |
mutate(cdt=as_datetime(creationDate, tz="US/Pacific"),stm=as_datetime(startDate, tz="US/Pacific"),etm=as_datetime(endDate, tz="US/Pacific"),dst=as.numeric(as.character(value))) %>% | |
group_by(creationDate) %>% | |
mutate(cd=cumsum(dst)) %>% # cumulative distance covered # | |
mutate(mntm=min(stm)) %>% # start time for each run #%>% # time differential # | |
mutate(hr=hour(stm),min=minute(stm)) %>% | |
mutate(elt=as.numeric(etm-mntm)) %>% # total elapsed time # | |
mutate(dtm=as.numeric(etm-lag(etm))) %>% | |
ungroup() | |
# split each of your runs into a list element # | |
nkl<-split(nk,nk$creationDate) | |
# analyze your latest run # | |
rn<-nkl[[length(nkl)-0]] | |
# create loess smoothing of run data # | |
ls<-loess(cd ~ elt,data=rn,span=0.1) | |
# create smoothed data points and calculate speed # | |
rn <- rn %>% | |
mutate(prd = predict(ls,rn)) %>% | |
mutate(cdsm=prd-lag(prd), spd=cdsm/dtm, mph=spd*3600) %>% | |
mutate(rcdsm=cd-lag(cd), rspd=rcdsm/dtm, rmph=rspd*3600) | |
# plot your run # | |
# create plot title # | |
tt<-format(rn$cdt[1], format = "Your %B %d, %Y run") | |
# summary stats # | |
miles<-format(rn$cd[nrow(rn)],digits=2,nsmall=1) | |
time<-round(rn$elt[nrow(rn)]/60,0) | |
speed<-format(rn$cd[nrow(rn)]/rn$elt[nrow(rn)]*3600,digits=2,nsmall = 1) | |
pace<-format(rn$elt[nrow(rn)]/rn$cd[nrow(rn)]/60,digits=2,nsmall = 1) | |
# create subtitle # | |
sbt<-paste(miles," miles in ",time," minutes, ",pace," minutes per mile",sep="") | |
# speed over course of your run (the raw and the smoothed) # | |
ggplot(rn,aes(x=elt/60,y=mph)) + | |
geom_path(size=1.5) + | |
theme_minimal(base_size = 15) + | |
labs(x="minutes elapsed",y="miles per hour") + | |
theme(text=element_text(family="Open Sans")) + | |
ggtitle(tt,subtitle = sbt) + | |
theme(plot.title = element_text(hjust = 0.5)) + | |
theme(plot.subtitle = element_text(hjust = 0.5)) + | |
geom_point(aes(x=elt/60,y=rmph),color="gray") | |
ggsave(filename=paste(loc,"speedrawsmoothed.png",sep="")) | |
# determine run intervals using the CART algorithm # | |
rp<-rpart(mph ~ elt,data=rn,control = rpart.control(cp=0.05)) # adjust the cp parameter if the phases are over or under fit # | |
rn$rpp<-predict(rp,rn) | |
# add intervals and lables to your run dataset # | |
rna<-rn %>% | |
group_by(rpp) %>% | |
summarise(mnt=min(elt),mxt=max(elt),mnd=min(prd),mxd=max(prd)) %>% | |
mutate(dst=mxd-mnd,avt=(mxt+mnt)/2) %>% | |
mutate(gt=paste(format(dst,digits = 1)," miles @ ",format(rpp,digits=2)," mph",sep="")) | |
# speed over course of your run - with interval summaries # | |
ggplot(rn,aes(x=elt/60,y=mph)) + | |
geom_point(color="gray",size=0.5) + | |
geom_line(aes(x=elt/60,y=rpp)) + | |
geom_label(data=rna,aes(x=avt/60,y=rpp,label=gt),color="red",nudge_y = 0.3) + | |
theme_minimal() + | |
labs(x="minutes elapsed",y="miles per hour") + | |
theme(text=element_text(family="Open Sans")) + | |
ggtitle(tt,subtitle = sbt) + | |
theme(plot.title = element_text(hjust = 0.5)) + | |
theme(plot.subtitle = element_text(hjust = 0.5)) | |
ggsave(filename=paste(loc,"speedphase.png",sep="")) | |
# distance over the course of your run - color coded speed # | |
ggplot(rn,aes(x=elt/60,y=prd)) + | |
geom_path(aes(color=mph),size=3) + | |
scale_color_gradient(low="blue",high="red",name="miles/hour") + | |
theme_minimal() + | |
labs(x="minutes elapsed",y="miles covered") + | |
theme(text=element_text(family="Open Sans")) + | |
ggtitle(tt,subtitle = sbt) + | |
theme(plot.title = element_text(hjust = 0.5)) + | |
theme(plot.subtitle = element_text(hjust = 0.5)) | |
ggsave(filename=paste(loc,"speedcolor.png",sep="")) | |
# plot speed for your last nine runs # | |
rns<-nkl[(length(nkl)-8):(length(nkl)-0)] | |
# function for creating loess smoothing for each run # | |
ra <- function(x) { | |
ls<-loess(cd ~ elt,data=x,span=0.1) | |
x<-x %>% | |
mutate(prd = predict(ls,x)) %>% | |
mutate(cdsm=c(NA,diff(prd)), spd=cdsm/dtm, mph=spd*3600) | |
return(x) | |
} | |
# apply function over the list of runs # | |
rnl<-lapply(rns,ra) | |
rnd<-do.call('rbind',rnl) | |
# create chart labels # | |
rnd$dtf<-format(rnd$cdt, format = "%B %d, %Y") | |
ru<-unique(rnd[c("cdt","dtf")]) | |
ru<-ru[order(ru$cdt),] | |
rnd$dtf<-factor(rnd$dtf,levels=ru$dtf) | |
# speed over course of your run # | |
ggplot(rnd,aes(x=elt/60,y=mph)) + | |
geom_path() + | |
theme_minimal(base_size=15) + | |
facet_wrap(~dtf) + | |
ggtitle("Speed versus Time for Your Last Nine Runs") + | |
labs(x="minutes elapsed",y="miles per hour") + | |
theme(text=element_text(family="Open Sans")) + | |
#ggtitle(tt,subtitle = sbt) + | |
theme(plot.title = element_text(hjust = 0.5)) + | |
theme(plot.subtitle = element_text(hjust = 0.5)) | |
ggsave(filename=paste(loc,"speedtime9.png",sep="")) | |
# plot a distribution of when you run during the day # | |
ggplot(nk,aes(x=hr)) + | |
geom_bar(fill="blue") + | |
theme_minimal(base_size=15) + | |
labs(x="hour of the day") + | |
theme(text=element_text(family="Open Sans")) + | |
ggtitle("What time of day are you running?") + | |
theme(plot.title = element_text(hjust = 0.5)) + | |
theme(plot.subtitle = element_text(hjust = 0.5)) + | |
theme(axis.title.y=element_blank(), | |
axis.text.y=element_blank(), | |
axis.ticks.y=element_blank()) + | |
scale_x_continuous(labels=c(paste(c(5:11),"AM",sep=""),"12PM",paste(c(1:8),"PM",sep="")),breaks=c(5:20)) | |
ggsave(filename=paste(loc,"timeofday.png",sep="")) | |
# summarize workouts ----- | |
wk <- data.frame(XML:::xmlAttrsToDataFrame(xml["//Workout"]),stringsAsFactors = F) | |
# restrict by date # | |
dtfr<-"2020-07-01" # beginning of date range to analyze | |
dtto<-"2021-06-19" # end of date range to analyze | |
# remove speed outliers - set to very high number if you want to keep all data points # | |
mxnsd<-4 # threshhold for allowed number of standard deviations from mean (for workout speed) # | |
# split your runs into two time periods by choosing a cutoff date below # | |
cutoff<-"2021-01-01" | |
lbl1<-"Jul2020 to Dec2020" # label for dates prior to cutoff # | |
lbl2<-"Jan2021 to Jun2021" # label for dates after cutoff # | |
# summarize and clean workout data # | |
wkr<-wk %>% | |
mutate_at(c("duration","totalDistance","totalEnergyBurned"), as.numeric) %>% | |
mutate_at(c("creationDate","startDate","endDate"), as_datetime) %>% | |
mutate(speed=totalDistance/duration*60) %>% | |
filter(sourceName=="Nike Run Club") %>% # remove/alter for different run app # | |
filter(creationDate>=as_date(dtfr),creationDate<=as_date(dtto)) %>% | |
mutate(month=format(creationDate,format="%b")) %>% | |
mutate(nsd=abs(speed-mean(speed))/sd(speed)) %>% # calculate number of standard deviations from the mean # | |
filter(nsd<=mxnsd) %>% | |
mutate(lbl=ifelse(creationDate<as_datetime(cutoff),lbl1,lbl2)) | |
# fit your runs to the Riegel formula # | |
wkr <- wkr %>% | |
mutate(lnv=log(speed),lnd=log(totalDistance)) | |
# fit Riegel formula with fixed exponent of 0.06 # | |
lm1<-lm(I(lnv-0.06*lnd)~1,data=wkr) | |
# fit Riegel formula with best fit exponent # | |
lm2<-lm(lnv ~ lnd,data=wkr) | |
# fit each subsegment to its own Riegel line # | |
lm11<-lm(I(lnv+0.06*lnd)~1,data=wkr[wkr$lbl==lbl1,]) | |
lm12<-lm(I(lnv+0.06*lnd)~1,data=wkr[wkr$lbl==lbl2,]) | |
lm21<-lm(lnv ~ lnd,data=wkr[wkr$lbl==lbl1,]) | |
lm22<-lm(lnv ~ lnd,data=wkr[wkr$lbl==lbl2,]) | |
wkr <- wkr %>% | |
mutate(prd1=exp(predict(lm1,wkr)-0.06*lnd),prd2=exp(predict(lm2,wkr))) %>% | |
mutate(prd11=exp(predict(lm11,wkr)-0.06*lnd),prd12=exp(predict(lm12,wkr)-0.06*lnd)) %>% | |
mutate(prd21=exp(predict(lm21,wkr)),prd22=exp(predict(lm22,wkr))) %>% | |
mutate(dff1=speed-prd1,dff2=speed-prd2) %>% # identifying best runs - speed over expectation # | |
mutate(dftm1=totalDistance*(1/prd1-1/speed)*60,dftm2=totalDistance*(1/prd2-1/speed)*60) %>% # identifying best runs - seconds below expectation # | |
mutate(rnk1=rank(-dff1),rnk2=rank(-dff2),rnkt1=rank(-dftm1),rnkt2=rank(-dftm2)) | |
wkr$lbl<-factor(wkr$lbl,levels=c(lbl1,lbl2)) | |
# plot speed versus distance - Riegel formula with fixed exponent # | |
ggplot(wkr,aes(x=totalDistance,y=speed,color=lbl)) + | |
geom_point() + | |
scale_color_manual(values=c("red","blue")) + | |
theme_minimal() + | |
geom_line(aes(x=totalDistance,y=prd11),color="red") + | |
geom_line(aes(x=totalDistance,y=prd12),color="blue") + | |
theme(text=element_text(family="Open Sans")) + | |
theme(plot.title = element_text(hjust = 0.5)) + | |
theme(plot.subtitle = element_text(hjust = 0.5)) + | |
labs(x="distance covered (miles)",y="speed (miles per hour)") + | |
theme(legend.title = element_blank()) + | |
ggtitle("Distance versus speed, fit to Riegel formula",subtitle = "Using fixed exponent - 1.06") | |
ggsave(filename=paste(loc,"riegel.png",sep="")) | |
# plot speed versus distance - Riegel formula with best fit exponent # | |
ggplot(wkr,aes(x=totalDistance,y=speed,color=lbl)) + | |
geom_point() + | |
scale_color_manual(values=c("red","blue")) + | |
theme_minimal() + | |
geom_line(aes(x=totalDistance,y=prd21),color="red") + | |
geom_line(aes(x=totalDistance,y=prd22),color="blue") + | |
theme(text=element_text(family="Open Sans")) + | |
theme(plot.title = element_text(hjust = 0.5)) + | |
theme(plot.subtitle = element_text(hjust = 0.5)) + | |
labs(x="distance covered (miles)",y="speed (miles per hour)") + | |
theme(legend.title = element_blank()) + | |
ggtitle("Distance versus speed, fit to Riegel formula",subtitle = "Exponent represents best fit to data") | |
ggsave(filename=paste(loc,"riegel_bestfit.png",sep="")) | |
# summarize your best runs in a gt table # | |
bm<-3.1 # best time at [bm] miles # | |
bf <-function(x) { | |
# create loess smoothing of run data # | |
ls<-loess(cd ~ elt,data=x,span=0.1) | |
x2 <- x %>% | |
mutate(prd = predict(ls,x)) %>% | |
mutate(cdsm=c(NA,diff(prd)), spd=cdsm/dtm, mph=spd*3600) %>% | |
mutate(rcdsm=c(NA,diff(cd)), rspd=rcdsm/dtm, rmph=rspd*3600) %>% | |
crossing(select(x,ncd=cd,nelt=elt,nstm=stm)) %>% | |
filter(ncd>=cd+bm) %>% | |
group_by(stm) %>% | |
slice_head(n=1) %>% | |
ungroup() %>% | |
mutate(dff=nelt-elt) %>% | |
arrange(elt) %>% | |
slice_head(n=1) %>% | |
select(cdt,stm,nstm,elt,nelt,cd,ncd,dff) %>% | |
mutate(mins=dff/60) | |
return(x2) | |
} | |
fnk<-lapply(nkl,bf) | |
fnkd<-bind_rows(fnk) | |
# sort by fastest runs # | |
fnkd<-arrange(fnkd,mins) | |
library(gt) | |
library(webshot) # load this library if you want to save your table as png file # | |
gtfst <- fnkd %>% | |
mutate(strt=elt/60,endt=nelt/60,spd=ncd/dff*3600,cdt=as_date(cdt)) %>% | |
select(-stm,-nstm,-cd,-elt,-nelt,-dff,-ncd) %>% | |
relocate(mins, .after=endt) %>% | |
slice_head(n=10) %>% | |
gt() %>% | |
tab_header(title=paste0("Your 10 fastest ",bm," mile runs")) %>% | |
fmt_date(columns=cdt, date_style=3) %>% | |
fmt_number(columns=c(mins,strt,endt),decimals = 1,pattern="{x} minutes") %>% | |
fmt_number(columns=spd,decimals=2,pattern="{x} mph") %>% | |
cols_label(cdt="workout date",mins="run time",strt="start time",endt="end time",spd="speed") %>% | |
cols_align(columns=c(strt:spd),align="center") | |
gtsave(gtfst,file="fastest_table.png", path = loc) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment