Skip to content

Instantly share code, notes, and snippets.

import plotly.express as px
fig = px.scatter(shot_blot_df,
title=f'{latest_day_str} - Playoff game shot profiles',
x="filt_avg", y="segment", size="shot_freq_x",
color="shot_acc_x", color_continuous_scale=px.colors.sequential.Blues,
facet_row="group",
template="plotly_white", width=1200, height=750,
labels={'filt_avg': 'Distance from the rim', 'segment': 'Sample size',
'pts_pct_x': 'Proportion of points', 'shot_ev': 'Expected<BR>points<BR>per shot'}
)
import pandas as pd
import utils
from scipy.spatial.distance import cosine
from nba_api.stats.static import teams
shots_df = utils.load_shots_df()
gdf = utils.get_shot_dist_df(shots_df)
# FILTER GAMES FOR THE LATEST DAY
day_df = shots_df[shots_df["timeActual"].dt.date == shots_df["timeActual"].dt.date.max()]
# Build more granular shot bins
filt_start = 0
filt_width = 2
filt_inc = 0.25
gdf_list = list()
for i in range(1 + int(30 * (1 / filt_inc))):
filt_df = shots_df[(shots_df["shotDistance"] >= filt_start) & (shots_df["shotDistance"] < filt_start + filt_width)]
gdf = filt_df.groupby("shot_zone").agg({"shot_made": "sum", "period": "count"})
gdf = gdf.reset_index().rename({"period": "shot_atts"}, axis=1)
gdf = gdf.assign(team="NBA")
tm_ranks = gdf_tot.groupby("team").sum()["rel_pts"].sort_values().index.to_list()
fig = px.bar(gdf_tot, x="team", facet_row="shot_zone", y="shot_freq", color="rel_ev",
color_continuous_scale=px.colors.diverging.RdYlBu_r, color_continuous_midpoint=0,
category_orders={"team": tm_ranks})
fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
fig.show()
# ========================================
# Now let's do the same for each team
# ========================================
shots_df["teamId"] = shots_df["teamId"].astype(int)
gdf_list = list()
for tm_id in shots_df.teamId.unique():
tm_df = shots_df[shots_df.teamId == tm_id]
tm_gdf = tm_df.groupby("shot_zone").agg({"shot_made": "sum", "period": "count"})
tm_gdf = tm_gdf.reset_index().rename({"period": "shot_atts"}, axis=1)
gdf = shots_df.groupby("shot_zone").agg({"shot_made": "sum", "period": "count"})
gdf = gdf.reset_index().rename({"period": "shot_atts"}, axis=1)
gdf = gdf.assign(team="NBA")
gdf = gdf.assign(shot_freq=gdf.shot_atts / gdf.shot_atts.sum())
gdf = gdf.assign(shot_acc=gdf.shot_made / gdf.shot_atts)
import logging
import pandas as pd
import numpy as np
import utils
from nba_api.stats.static import players
from nba_api.stats.static import teams
import plotly.express as px
logger = logging.getLogger(__name__)
@databyjp
databyjp / utils.py
Created February 25, 2022 03:42
Add on-court players columns
def add_pbp_oncourt_columns(df):
"""
Add on-court player columns to the play-by-play dataframe.
Players based on substitution data and box-score data (for starters)
:param df: PBP dataframe
:return:
"""
df = df.sort_values(["GAME_ID", "actionNumber"])
df = df.reset_index(drop=True)
@databyjp
databyjp / utils.py
Created February 25, 2022 01:07
Fetch data with gameID
def fetch_data_w_gameid(json_dir, gm_id, datatype="boxscore"):
"""
Download a datafile based on gameID as downloaded from NBA API & saves to file
:param json_dir: Directory for saving downloaded JSON
:param gm_id: NBA game ID
:param datatype: What data types to download - determines endpoint to use
:return:
"""
from nba_api.stats.endpoints import boxscoreadvancedv2
from nba_api.live.nba.endpoints import playbyplay
@databyjp
databyjp / tmp_rename_file.py
Created February 22, 2022 08:50
Had to rename a bunch of files
import os
srcdir = "dl_data/box_scores/json"
files = [f for f in os.listdir(srcdir) if f.endswith('.json') and not f.startswith('00')]
for f in files:
os.rename(os.path.join(srcdir, f), os.path.join(srcdir, '00' + f))