Created
February 25, 2017 23:20
-
-
Save mcohen01/13df19b7f4184ec1e7298aa4cbd0bcb0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(dplyr) | |
| library(readr) | |
| library(reshape2) | |
| library(stringr) | |
| library(ggplot2) | |
| library(pdftools) | |
| setwd('/Users/mcohen/Downloads/Scratch/codeplay/R/motogp/lap_times') | |
| pattern <- "[0-9]{1}\\:[0-9]{2}\\.[0-9]{3}" | |
| rider_name_pattern <- '\\([0-9]+\\)\\s[a-zA-Z]+\\s[A-Z]+' | |
| rider_name_no_pattern <- '\\([0-9]+\\)\\s[a-zA-Z]+\\s[A-Z\u00f1\u00d1]+' | |
| rider_no_pattern <- '\\([0-9]+\\)' | |
| all_lines <- function(pdf, pages) { | |
| unlist( | |
| sapply(1:pages, function(index) { | |
| strsplit(pdf[index], '\n')[[1]] | |
| }) | |
| ) | |
| } | |
| rider_from_line <- function(line, is_left) { | |
| pattern <- if (is_left) { | |
| str_c('^', rider_name_no_pattern) | |
| } else { | |
| str_c(rider_name_no_pattern, '$') | |
| } | |
| match <- gregexpr(pattern, line)[[1]] | |
| start <- match[1] | |
| end <- attr(match, 'match.length') + match[1] | |
| full_name <- substr(line, start, end) | |
| full_name <- strsplit(full_name, ' ')[[1]] | |
| if (full_name[2] == 'Andrea') { | |
| str_c(full_name[2], substr(full_name[3], 1, 1)) | |
| } else { | |
| full_name[2] | |
| } | |
| } | |
| lap_time <- function(s) { | |
| m <- strsplit(as.character(s), '\\:') | |
| s <- strsplit(m[[1]][2], '\\.') | |
| (as.numeric(m[[1]][1]) * 60) + | |
| as.numeric(s[[1]][1]) + | |
| (as.numeric(s[[1]][2])/1000) | |
| } | |
| parse.times <- function(line) { | |
| pattern <- "\\s{1}[0-9]{1}\\:[0-9]{2}\\.[0-9]{3}" | |
| matches <- gregexpr(pattern, line)[[1]] | |
| exists <- length(matches) > 1 || matches[[1]] > -1 | |
| matches <- if (exists) unlist(matches) else 0 | |
| if (length(matches) > 0 && matches[1] > 0) { | |
| left <- matches[matches < 30] | |
| right <- matches[matches < 80 & matches > 60] | |
| f <- function(x) substr(line, x + 1, x + 8) | |
| list(sapply(left, f), sapply(right, f)) | |
| } | |
| } | |
| column_laps <- function(lines, rider, is_left, lap_no) { | |
| pattern <- if (is_left) { | |
| str_c('^', rider_name_no_pattern) | |
| } else { | |
| str_c(rider_name_no_pattern, '$') | |
| } | |
| rider_line <- grep(pattern, lines) | |
| laps <- data.frame(rider=NA, time=NA, lap=NA) | |
| for (i in seq_along(lines)) { | |
| if (length(rider_line) > 0 && i == rider_line) { | |
| rider <- rider_from_line(lines[i], is_left) | |
| lap_no <- 1 | |
| } | |
| column <- if (is_left) 1 else 2 | |
| time <- unlist(parse.times(lines[i])[[column]]) | |
| if (! is.null(time)) { | |
| laps <- add_row(laps, rider = rider, | |
| time = lap_time(time), | |
| lap = lap_no) | |
| lap_no <- lap_no + 1 | |
| } | |
| } | |
| laps[-1, ] | |
| } | |
| page_laps <- function(page, rider, lap_no) { | |
| lines <- strsplit(page, '\n')[[1]] | |
| left <- column_laps(lines, rider, T, lap_no) | |
| rider <- tail(left$rider, 1) | |
| lap_no <- tail(left$lap, 1) | |
| right <- column_laps(lines, rider, F, lap_no) | |
| rbind(left, right) | |
| } | |
| all_laps <- function(pdf, pages) { | |
| lines <- strsplit(pdf[1], '\n')[[1]] | |
| line <- grep(str_c('^', rider_name_no_pattern), lines) | |
| rider <- rider_from_line(lines[line], T) | |
| laps <- page_laps(pdf[1], rider, 1) | |
| for (i in seq(2, pages)) { | |
| rider <- tail(laps$rider, 1) | |
| lap_no <- tail(laps$lap, 1) | |
| page <- page_laps(pdf[i], rider, lap_no) | |
| laps <- rbind(laps, page) | |
| } | |
| laps | |
| } | |
| plot <- function(data, show_points=F) { | |
| g <- ggplot(data) | |
| if (length(colnames(data)) == 3) { | |
| g <- g + geom_line(aes(x=lap, y = time, color=rider)) | |
| } else { | |
| g <- g + geom_line(aes(x=lap, y = time), color='steelblue') | |
| } | |
| if (show_points) { | |
| g <- g + geom_point(aes(x=lap, y = time, color=rider)) | |
| } | |
| print(g) | |
| } | |
| plot_laps <- function(laps, topK_riders) { | |
| filter(laps, time < max_time) %>% | |
| filter(time > min_time) %>% | |
| filter(rider %in% unique(laps$rider)[1:topK_riders]) %>% | |
| arrange(rider, time) %>% | |
| group_by(rider) %>% | |
| mutate(lap = row_number()) %>% | |
| plot() | |
| } | |
| main <- function(topK_riders) { | |
| pdf <- pdf_text(paste(data_dir, '/', analysis, sep = '')) | |
| pages <- pdf_info(paste(data_dir, '/', analysis, sep = ''))$pages | |
| all_laps(pdf, pages) %>% | |
| plot_laps(topK_riders) | |
| } | |
| data_dir <- '20170217' | |
| analysis <- 'analysis.pdf' | |
| min_time <- 85 | |
| max_time <- 95 | |
| main(10) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment