databyjp’s gists

databyjp / 20220427_playoff_gameviz_init_viz.py

Created April 28, 2022 14:16

	import plotly.express as px
	fig = px.scatter(shot_blot_df,
	title=f'{latest_day_str} - Playoff game shot profiles',
	x="filt_avg", y="segment", size="shot_freq_x",
	color="shot_acc_x", color_continuous_scale=px.colors.sequential.Blues,
	facet_row="group",
	template="plotly_white", width=1200, height=750,
	labels={'filt_avg': 'Distance from the rim', 'segment': 'Sample size',
	'pts_pct_x': 'Proportion of points', 'shot_ev': 'Expected<BR>points<BR>per shot'}
	)

databyjp / 20220403_shotblot_comp_loop.py

Created April 8, 2022 17:31

	import pandas as pd
	import utils
	from scipy.spatial.distance import cosine
	from nba_api.stats.static import teams

	shots_df = utils.load_shots_df()
	gdf = utils.get_shot_dist_df(shots_df)

	# FILTER GAMES FOR THE LATEST DAY
	day_df = shots_df[shots_df["timeActual"].dt.date == shots_df["timeActual"].dt.date.max()]

databyjp / build_shot_dists_w_rolling_window.py

Created March 17, 2022 13:57

	# Build more granular shot bins
	filt_start = 0
	filt_width = 2
	filt_inc = 0.25
	gdf_list = list()
	for i in range(1 + int(30 * (1 / filt_inc))):
	filt_df = shots_df[(shots_df["shotDistance"] >= filt_start) & (shots_df["shotDistance"] < filt_start + filt_width)]
	gdf = filt_df.groupby("shot_zone").agg({"shot_made": "sum", "period": "count"})
	gdf = gdf.reset_index().rename({"period": "shot_atts"}, axis=1)
	gdf = gdf.assign(team="NBA")

databyjp / plot_init_shot_dists.py

Created March 17, 2022 12:48

	tm_ranks = gdf_tot.groupby("team").sum()["rel_pts"].sort_values().index.to_list()
	fig = px.bar(gdf_tot, x="team", facet_row="shot_zone", y="shot_freq", color="rel_ev",
	color_continuous_scale=px.colors.diverging.RdYlBu_r, color_continuous_midpoint=0,
	category_orders={"team": tm_ranks})
	fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
	fig.show()

databyjp / shot_dist_profile_by_team.py

Last active March 17, 2022 12:33


	# ========================================
	# Now let's do the same for each team
	# ========================================
	shots_df["teamId"] = shots_df["teamId"].astype(int)
	gdf_list = list()
	for tm_id in shots_df.teamId.unique():
	tm_df = shots_df[shots_df.teamId == tm_id]
	tm_gdf = tm_df.groupby("shot_zone").agg({"shot_made": "sum", "period": "count"})
	tm_gdf = tm_gdf.reset_index().rename({"period": "shot_atts"}, axis=1)

databyjp / group_shot_dists.py

Created March 17, 2022 12:09

	gdf = shots_df.groupby("shot_zone").agg({"shot_made": "sum", "period": "count"})
	gdf = gdf.reset_index().rename({"period": "shot_atts"}, axis=1)
	gdf = gdf.assign(team="NBA")
	gdf = gdf.assign(shot_freq=gdf.shot_atts / gdf.shot_atts.sum())
	gdf = gdf.assign(shot_acc=gdf.shot_made / gdf.shot_atts)

databyjp / Shot dists.py

Created March 17, 2022 12:04


	import logging
	import pandas as pd
	import numpy as np
	import utils
	from nba_api.stats.static import players
	from nba_api.stats.static import teams
	import plotly.express as px

	logger = logging.getLogger(__name__)

databyjp / utils.py

Created February 25, 2022 03:42

Add on-court players columns

	def add_pbp_oncourt_columns(df):
	"""
	Add on-court player columns to the play-by-play dataframe.
	Players based on substitution data and box-score data (for starters)
	:param df: PBP dataframe
	:return:
	"""
	df = df.sort_values(["GAME_ID", "actionNumber"])
	df = df.reset_index(drop=True)

databyjp / utils.py

Created February 25, 2022 01:07

Fetch data with gameID

	def fetch_data_w_gameid(json_dir, gm_id, datatype="boxscore"):
	"""
	Download a datafile based on gameID as downloaded from NBA API & saves to file
	:param json_dir: Directory for saving downloaded JSON
	:param gm_id: NBA game ID
	:param datatype: What data types to download - determines endpoint to use
	:return:
	"""
	from nba_api.stats.endpoints import boxscoreadvancedv2
	from nba_api.live.nba.endpoints import playbyplay

databyjp / tmp_rename_file.py

Created February 22, 2022 08:50

Had to rename a bunch of files

	import os
	srcdir = "dl_data/box_scores/json"
	files = [f for f in os.listdir(srcdir) if f.endswith('.json') and not f.startswith('00')]
	for f in files:
	os.rename(os.path.join(srcdir, f), os.path.join(srcdir, '00' + f))

JP Hwang databyjp