Skip to content

Instantly share code, notes, and snippets.

# Build more granular shot bins
filt_start = 0
filt_width = 2
filt_inc = 0.25
gdf_list = list()
for i in range(1 + int(30 * (1 / filt_inc))):
filt_df = shots_df[(shots_df["shotDistance"] >= filt_start) & (shots_df["shotDistance"] < filt_start + filt_width)]
gdf = filt_df.groupby("shot_zone").agg({"shot_made": "sum", "period": "count"})
gdf = gdf.reset_index().rename({"period": "shot_atts"}, axis=1)
gdf = gdf.assign(team="NBA")
tm_ranks = gdf_tot.groupby("team").sum()["rel_pts"].sort_values().index.to_list()
fig = px.bar(gdf_tot, x="team", facet_row="shot_zone", y="shot_freq", color="rel_ev",
color_continuous_scale=px.colors.diverging.RdYlBu_r, color_continuous_midpoint=0,
category_orders={"team": tm_ranks})
fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
fig.show()
# ========================================
# Now let's do the same for each team
# ========================================
shots_df["teamId"] = shots_df["teamId"].astype(int)
gdf_list = list()
for tm_id in shots_df.teamId.unique():
tm_df = shots_df[shots_df.teamId == tm_id]
tm_gdf = tm_df.groupby("shot_zone").agg({"shot_made": "sum", "period": "count"})
tm_gdf = tm_gdf.reset_index().rename({"period": "shot_atts"}, axis=1)
gdf = shots_df.groupby("shot_zone").agg({"shot_made": "sum", "period": "count"})
gdf = gdf.reset_index().rename({"period": "shot_atts"}, axis=1)
gdf = gdf.assign(team="NBA")
gdf = gdf.assign(shot_freq=gdf.shot_atts / gdf.shot_atts.sum())
gdf = gdf.assign(shot_acc=gdf.shot_made / gdf.shot_atts)
import logging
import pandas as pd
import numpy as np
import utils
from nba_api.stats.static import players
from nba_api.stats.static import teams
import plotly.express as px
logger = logging.getLogger(__name__)
@databyjp
databyjp / utils.py
Created February 25, 2022 03:42
Add on-court players columns
def add_pbp_oncourt_columns(df):
"""
Add on-court player columns to the play-by-play dataframe.
Players based on substitution data and box-score data (for starters)
:param df: PBP dataframe
:return:
"""
df = df.sort_values(["GAME_ID", "actionNumber"])
df = df.reset_index(drop=True)
@databyjp
databyjp / utils.py
Created February 25, 2022 01:07
Fetch data with gameID
def fetch_data_w_gameid(json_dir, gm_id, datatype="boxscore"):
"""
Download a datafile based on gameID as downloaded from NBA API & saves to file
:param json_dir: Directory for saving downloaded JSON
:param gm_id: NBA game ID
:param datatype: What data types to download - determines endpoint to use
:return:
"""
from nba_api.stats.endpoints import boxscoreadvancedv2
from nba_api.live.nba.endpoints import playbyplay
@databyjp
databyjp / tmp_rename_file.py
Created February 22, 2022 08:50
Had to rename a bunch of files
import os
srcdir = "dl_data/box_scores/json"
files = [f for f in os.listdir(srcdir) if f.endswith('.json') and not f.startswith('00')]
for f in files:
os.rename(os.path.join(srcdir, f), os.path.join(srcdir, '00' + f))
fig.add_shape(type="rect",
x0=rtg_min, y0=rtg_mid, x1=rtg_mid, y1=rtg_max,
fillcolor="LightSeaGreen",
opacity=0.25,
line_width=0,
layer="below"
)
fig.add_shape(type="rect",
x0=rtg_mid, y0=rtg_min, x1=rtg_max, y1=rtg_mid,
fillcolor="LightSteelBlue",
fig = px.scatter(df, x="DEF_RATING", y="OFF_RATING",
color="legend",
title=f"Game-by-game performances by the {highlight_tm} - '21-'22",
color_discrete_sequence=["#eeeeee", color_dict[highlight_tm][0], color_dict[highlight_tm][1]],
size="inv_days_since_game",
size_max=18,
template="plotly_white",
width=1200, height=800,
labels={"OFF_RATING": "Offensive Rating (higher is better)",
"DEF_RATING": "Dffensive Rating (to the left is better)",