Created
November 22, 2018 01:07
-
-
Save mcohen01/9df22e5819dca2d0ba5b2a6eb87b4732 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import PyPDF2 | |
from collections import defaultdict | |
import matplotlib.pyplot as plt | |
import warnings | |
import seaborn as sns | |
sns.set() | |
warnings.filterwarnings('ignore') | |
riders = [ | |
'MARQUEZ', 'ROSSI', 'DOVIZIOSO', 'BRADL', 'MORBIDEL', | |
'ESPARGARO', 'PETRUCCI', 'ESPARGARO', 'PIRRO', 'BAGNAI', | |
'RINS', 'NAKAGAMI', 'MILLER', 'MIR', 'RABAT', 'ZARCO', | |
'LORENZO', 'IANNONE', 'ABRAHAM', 'SMITH', 'FOLGER', 'QUARTARARO', | |
'SYAHRIN', 'OLIVEIRA', 'VIÑALES' | |
] | |
file = open('laps.pdf', 'rb') | |
fileReader = PyPDF2.PdfFileReader(file) | |
laps = defaultdict(list) | |
lap_time_pattern = "1'[2,3][0-9]\.[0-9]{3}" | |
for i in range(fileReader.numPages): | |
lines = fileReader.getPage(i).extractText().split('\n') | |
skip_next = False | |
for line in lines: | |
for token in line.split(' '): | |
if token in riders and line != ', 2018Maverick VIÑALES': | |
rider = laps[token] | |
if line == ', 2018Maverick VIÑALES': | |
skip_next = True | |
match = re.match(lap_time_pattern, token) | |
funky_match = re.match('dT4' + lap_time_pattern, line) | |
if funky_match: | |
rider.append(funky_match.group(0).replace('dT4', '')) | |
if match: | |
if skip_next: | |
skip_next = False | |
else: | |
rider.append(match.group(0)) | |
fig, ax = plt.subplots(figsize=(13, 9)) | |
cnt = 0 | |
max_riders = 9 | |
max_laps_per_rider = 45 | |
for rider in laps.keys(): | |
if rider == 'ESPARGARO': continue; | |
if cnt == max_riders: break; | |
times = sorted([round(float(lap[2:]) + 60, 3) for lap in laps[rider]])[:max_laps_per_rider] | |
plt.plot(times, label=rider) | |
cnt += 1 | |
ax.set_xlabel('laps') | |
ax.set_ylabel('lap time') | |
plt.legend() | |
plt.savefig('lap_times_11.20.2018.png') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment