Created
January 17, 2024 12:38
-
-
Save haydenflinner/1c713169250170a21b34ecc25e4d4992 to your computer and use it in GitHub Desktop.
Pandas code for modelling live race timing + statistical insight from it
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from dataclasses import dataclass | |
@dataclass | |
class PositionSample: | |
lap: int | |
rider: str | |
pos: int | |
Sample = PositionSample | |
# 5 riders, 4 lap race. Real data should reveal more noise / trends. | |
df = pd.DataFrame([ | |
Sample(1, 'a', 1), | |
Sample(1, 'b', 2), | |
Sample(1, 'c', 3), | |
Sample(1, 'd', 4), | |
Sample(1, 'e', 5), | |
Sample(2, 'a', 1), | |
Sample(2, 'b', 2), | |
Sample(2, 'c', 3), | |
Sample(2, 'e', 4), | |
Sample(2, 'd', 5), # <-- d and e traded places | |
Sample(3, 'a', 1), | |
Sample(3, 'd', 2), | |
Sample(3, 'e', 3), | |
Sample(3, 'c', 4), # b took out c and himself. or c took out b and himself. | |
Sample(3, 'b', 5), | |
# Finishing lap unchanged from prior lap. | |
Sample(4, 'a', 1), | |
Sample(4, 'd', 2), | |
Sample(4, 'e', 3), | |
Sample(4, 'c', 4), | |
Sample(4, 'b', 5), | |
]) | |
# Our goal here is to find if there's a correlation between being near certain riders | |
# and changes in Position. For example, maybe a certain rider is known to ride | |
# a wide bike and so being just behind him means you have a below average | |
# chance of passing. To really weigh that you'd need to correct for speed with | |
# something like ELO or maybe just finishing position in the current race. | |
# Another example would be that having Jett Lawrence behind you is a recipe | |
# for losing one spot by the end of the lap. | |
# This may also reveal riders who have a tendency | |
# to put other riders far down the results sheet. | |
# For each lap sample, this is important info. | |
df = df.sort_values(by=["lap", "pos"]) | |
df["rider_ahead"] = df.groupby("lap")["rider"].shift() | |
df["rider_behind"] = df.groupby("lap")["rider"].shift(-1) | |
# Group by "rider" and then use shift to get the previous lap's "pos" | |
df = df.sort_values(by=["rider", "lap"]) | |
df["prev_pos"] = df.groupby("rider")["pos"].shift() | |
df["rider_ahead_last_lap"] = df.groupby("rider")["rider_ahead"].shift() | |
# display(df.sort_values(by=["lap", "pos"])) | |
# Drop rows where there is no previous lap. Not a big loss since first laps are especially hectic. | |
df = df.dropna(subset=["prev_pos"]) | |
df["pos_change"] = df.pos - df.prev_pos | |
# Resetting index if needed | |
df = df.reset_index(drop=True) | |
df.sort_values(by=["lap", "pos"]) | |
display(df.groupby("rider_ahead_last_lap")["pos_change"].max()) | |
import plotly.express as px | |
px.box(df, x='rider_ahead_last_lap', y='pos_change') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment