Skip to content

Instantly share code, notes, and snippets.

@gdbassett
Last active June 26, 2017 20:02
Show Gist options
  • Save gdbassett/0fdeb37705bae4477d3034c0033323cb to your computer and use it in GitHub Desktop.
Save gdbassett/0fdeb37705bae4477d3034c0033323cb to your computer and use it in GitHub Desktop.
basic R code to parse livesplit splits into a dataframe
speedrun <- XML::xmlParse("/livesplit.lss")
speedrun <- XML::xmlToList(speedrun)
chunk <- do.call(rbind, lapply(speedrun[['Segments']], function(segments) {
segments.df <- do.call(rbind, lapply(segments[['SegmentHistory']], function(segment) {
if ('RealTime' %in% names(segment))
data.frame(`attemptID` = segment$.attrs['id'], RealTime = segment$RealTime)
}))
segments.df$name <- rep(segments$Name, nrow(segments.df))
segments.df
}))
chunk$RealTime <- unlist(lapply(chunk$RealTime, function(t) {
dt <- unlist(stringr::str_split(t, ":"))
as.difftime(as.double(dt[1])*60*60 + as.double(dt[2])*60 + as.double(dt[3]), units="secs")
})) %>% as.difftime(units="secs")
chunk$name <- factor(chunk$name, levels=unique(chunk$name))
chunk <- chunk %>%
group_by(name) %>%
mutate(n=n()) %>%
ungroup()
labels <- unique(paste0(chunk$name, " - ", chunk$n, " attempts"))
names(labels) <- unique(chunk$name)
# plot distribution of split attempts
ggplot(chunk) +
geom_density(aes(x=RealTime)) +
facet_wrap(~name, ncol=1, scale="free_y", labeller=as_labeller(labels)) +
labs(x="Time in seconds", y="Likelihood of time", title="Likelihood of getting a given time for each split") +
scale_x_continuous(expand=c(0,0), limits = c(0, NA)) +
scale_y_continuous(expand=c(0,0)) +
theme_minimal() +
theme(
axis.ticks = element_blank(),
panel.grid = element_blank(),
axis.title = element_text(hjust=0),
axis.text.y = element_blank(),
title = element_text(hjust=0),
panel.grid.major.x = element_line(linetype="dotted")
)
# Plot improvement over time
chunk %>%
mutate(attemptID = as.integer(attemptID)) %>%
mutate(RealTime = as.double(RealTime, units="secs")) %>%
ggplot() +
geom_line(aes(x=attemptID, y=RealTime), alpha=0.6) +
geom_smooth(aes(x=attemptID, y=RealTime), method="lm") +
facet_wrap(~name, ncol=1, scale="free_y", labeller=as_labeller(labels)) +
labs(y="Time in seconds", x="Attempt", title="Attempt vs time") +
scale_x_discrete(expand=c(0,0)) +
scale_y_log10(expand=c(0,0)) +
theme_minimal() +
theme(
axis.ticks = element_blank(),
panel.grid = element_blank(),
axis.title = element_text(hjust=0),
# axis.text.y = element_blank(),
title = element_text(hjust=0),
panel.grid.major.x = element_line(linetype="dotted")
)
# group by split
chunk2 <- chunk %>%
group_by(name) %>%
tidyr::nest() %>%
mutate(dist = lapply(data, function(df) {density(as.double(df$RealTime, units="secs"))}))
# sample from splits
dist.sim <- unlist(lapply(1:10000, function(x) {
sum(unlist(lapply(chunk2$data, function(df) {
sample(as.double(df$RealTime, units="secs"), 1)
}))
)
}))
# total true splits
dist.true <- chunk %>%
group_by(attemptID) %>%
summarize(tot = sum(as.double(RealTime, units="secs")), complete=all(length(levels(name)) == length(name))) %>%
filter(complete) %>% select(-complete)
# calculate sum of best times
val.min <- chunk %>%
group_by(name) %>%
summarize(min = min(as.double(RealTime, units="secs"))) %>%
ungroup() %>%
select(min) %>%
colSums()
# plot true vs simulated splits
ggplot() +
geom_density(aes(x=tot), data=dist.true, color="green") +
geom_density(aes(x=tot), data=data.frame(tot=dist.sim), color="blue") +
geom_vline(aes(xintercept=val.min), color="red") +
labs(x="Time in Seconds", y="Likelihood of time", title="Simulated (Blue) vs Actual (green) vs Sum of Best (red)\nTotal Run Time") +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0)) +
theme_minimal() +
theme(
axis.ticks = element_blank(),
panel.grid = element_blank(),
axis.title = element_text(hjust=0),
axis.text.y = element_blank(),
title = element_text(hjust=0),
panel.grid.major.x = element_line(linetype="dotted")
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment