Skip to content

Instantly share code, notes, and snippets.

@magnusnissel
Created July 21, 2017 19:02
Show Gist options
  • Save magnusnissel/b486201f7613d03b7b0fd823fb36febd to your computer and use it in GitHub Desktop.
Save magnusnissel/b486201f7613d03b7b0fd823fb36febd to your computer and use it in GitHub Desktop.
Initial basic attempt at lineup parsing (from NBA play-by-play logs)
import os
import glob
import pandas as pd
def check_lineup(r):
a_cols = ["A1", "A2", "A3", "A4", "A5"]
h_cols = ["H1", "H2", "H3", "H4", "H5"]
a = [True for p in r[a_cols] if p != ""]
h = [True for p in r[h_cols] if p != ""]
if not all(a) or not all(h):
r["LINEUP_ERROR"] = True
r["LINEUP_STATUS"] = "Not enough players"
return r
def identify_first_open(slots):
for i, s in enumerate(slots):
if s == "":
return i+1
return i+1
def get_player_slot(p, slots):
for i, s in enumerate(slots):
if s == p:
return i+1
return None
def parse_lineups(df, away, home, box_df=None):
a_cols = ["A1", "A2", "A3", "A4", "A5"]
h_cols = ["H1", "H2", "H3", "H4", "H5"]
ln_cols = a_cols + h_cols
for c in ln_cols:
df[c] = ""
a_open = 1
h_open = 1
if box_df is not None:
starters = box_df[box_df["START_POSITION"].notnull()]
for i, r in starters.iterrows():
if r["TEAM_ABBREVIATION"] == away:
slot = "A{}".format(a_open)
a_open += 1
df.loc[0, slot] = r["PLAYER_NAME"]
if r["TEAM_ABBREVIATION"] == home:
slot = "H{}".format(h_open)
h_open += 1
df.loc[0, slot] = r["PLAYER_NAME"]
df = df.fillna("")
df["DESC"] = df["HOMEDESCRIPTION"] + df["VISITORDESCRIPTION"]
for i, r in df.iterrows():
try:
prev = df.loc[i-1].copy()
except KeyError:
prev = r.copy()
r[ln_cols] = prev[ln_cols]
df.loc[i, ln_cols] = prev[ln_cols]
if "SUB" in r["DESC"]:
t = r["PLAYER1_TEAM_ABBREVIATION"]
if t == away:
cols = a_cols
initial = "A"
elif t == home:
cols = h_cols
initial = "H"
#P2 enters, P1 leaves
p1id = r["PLAYER1_ID"]
p2id = r["PLAYER2_ID"]
p1_pos = get_player_slot(p1id, r[cols])
if p1_pos is not None:
slot = "{}{}".format(initial, p1_pos)
r[slot] = p2id
df.loc[i, slot] = p2id
else:
for p in ["PLAYER1", "PLAYER2", "PLAYER3"]:
pid = r["{}_ID".format(p)]
pid = r["{}_NAME".format(p)] # Testing
t = r["{}_TEAM_ABBREVIATION".format(p)]
if t == away:
cols = a_cols
initial = "A"
elif t == home:
cols = h_cols
initial = "H"
else:
initial = ""
cols = []
if cols:
first_open = identify_first_open(r[cols])
if first_open > 5: # trying to fit too many players
df.loc[i, "LINEUP_ERROR"] = True
df.loc[i, "LINEUP_STATUS"] = "Too many players"
slot = "{}{}".format(initial, first_open)
if pid not in set(r[cols]):
r[slot] = pid
df.loc[i, slot] = pid
if "LINEUP_ERROR" not in df.columns:
df["LINEUP_ERROR"] = False
df["LINEUP_STATUS"] = ""
else:
df["LINEUP_ERROR"] = df["LINEUP_ERROR"].fillna(False)
df["LINEUP_STATUS"] = df["LINEUP_STATUS"].fillna("")
df = df.apply(check_lineup, axis=1)
for i, r in df.iterrows():
if r["LINEUP_ERROR"] is True:
print(r["LINEUP_STATUS"])
print(r[ln_cols])
return df
def parse_lineups(pbp_path, box_path=None, away, home):
pdf = pd.read_csv(pbp_path, index_col=0)
if box_path is not None:
bdf = pd.read_csv(box_path, index_col=0)
else:
bdf = None
ldf = parse_lineups(pdf, away, home, bdf)
if __name__ == "__main__":
parse_lineups(csv_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment