databyjp’s gists

databyjp / build_shot_dists_w_rolling_window.py

Created March 17, 2022 13:57

	# Build more granular shot bins
	filt_start = 0
	filt_width = 2
	filt_inc = 0.25
	gdf_list = list()
	for i in range(1 + int(30 * (1 / filt_inc))):
	filt_df = shots_df[(shots_df["shotDistance"] >= filt_start) & (shots_df["shotDistance"] < filt_start + filt_width)]
	gdf = filt_df.groupby("shot_zone").agg({"shot_made": "sum", "period": "count"})
	gdf = gdf.reset_index().rename({"period": "shot_atts"}, axis=1)
	gdf = gdf.assign(team="NBA")

databyjp / plot_init_shot_dists.py

Created March 17, 2022 12:48

	tm_ranks = gdf_tot.groupby("team").sum()["rel_pts"].sort_values().index.to_list()
	fig = px.bar(gdf_tot, x="team", facet_row="shot_zone", y="shot_freq", color="rel_ev",
	color_continuous_scale=px.colors.diverging.RdYlBu_r, color_continuous_midpoint=0,
	category_orders={"team": tm_ranks})
	fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
	fig.show()

databyjp / shot_dist_profile_by_team.py

Last active March 17, 2022 12:33


	# ========================================
	# Now let's do the same for each team
	# ========================================
	shots_df["teamId"] = shots_df["teamId"].astype(int)
	gdf_list = list()
	for tm_id in shots_df.teamId.unique():
	tm_df = shots_df[shots_df.teamId == tm_id]
	tm_gdf = tm_df.groupby("shot_zone").agg({"shot_made": "sum", "period": "count"})
	tm_gdf = tm_gdf.reset_index().rename({"period": "shot_atts"}, axis=1)

databyjp / group_shot_dists.py

Created March 17, 2022 12:09

	gdf = shots_df.groupby("shot_zone").agg({"shot_made": "sum", "period": "count"})
	gdf = gdf.reset_index().rename({"period": "shot_atts"}, axis=1)
	gdf = gdf.assign(team="NBA")
	gdf = gdf.assign(shot_freq=gdf.shot_atts / gdf.shot_atts.sum())
	gdf = gdf.assign(shot_acc=gdf.shot_made / gdf.shot_atts)

databyjp / Shot dists.py

Created March 17, 2022 12:04


	import logging
	import pandas as pd
	import numpy as np
	import utils
	from nba_api.stats.static import players
	from nba_api.stats.static import teams
	import plotly.express as px

	logger = logging.getLogger(__name__)

databyjp / utils.py

Created February 25, 2022 03:42

Add on-court players columns

	def add_pbp_oncourt_columns(df):
	"""
	Add on-court player columns to the play-by-play dataframe.
	Players based on substitution data and box-score data (for starters)
	:param df: PBP dataframe
	:return:
	"""
	df = df.sort_values(["GAME_ID", "actionNumber"])
	df = df.reset_index(drop=True)

databyjp / utils.py

Created February 25, 2022 01:07

Fetch data with gameID

	def fetch_data_w_gameid(json_dir, gm_id, datatype="boxscore"):
	"""
	Download a datafile based on gameID as downloaded from NBA API & saves to file
	:param json_dir: Directory for saving downloaded JSON
	:param gm_id: NBA game ID
	:param datatype: What data types to download - determines endpoint to use
	:return:
	"""
	from nba_api.stats.endpoints import boxscoreadvancedv2
	from nba_api.live.nba.endpoints import playbyplay

databyjp / tmp_rename_file.py

Created February 22, 2022 08:50

Had to rename a bunch of files

	import os
	srcdir = "dl_data/box_scores/json"
	files = [f for f in os.listdir(srcdir) if f.endswith('.json') and not f.startswith('00')]
	for f in files:
	os.rename(os.path.join(srcdir, f), os.path.join(srcdir, '00' + f))

databyjp / further_annotations_20220215_test_analyse_gamelogs.py

Created February 17, 2022 06:30

	fig.add_shape(type="rect",
	x0=rtg_min, y0=rtg_mid, x1=rtg_mid, y1=rtg_max,
	fillcolor="LightSeaGreen",
	opacity=0.25,
	line_width=0,
	layer="below"
	)
	fig.add_shape(type="rect",
	x0=rtg_mid, y0=rtg_min, x1=rtg_max, y1=rtg_mid,
	fillcolor="LightSteelBlue",

databyjp / updated_plot_20220215_test_analyse_gamelogs.py

Last active February 17, 2022 05:18

	fig = px.scatter(df, x="DEF_RATING", y="OFF_RATING",
	color="legend",
	title=f"Game-by-game performances by the {highlight_tm} - '21-'22",
	color_discrete_sequence=["#eeeeee", color_dict[highlight_tm][0], color_dict[highlight_tm][1]],
	size="inv_days_since_game",
	size_max=18,
	template="plotly_white",
	width=1200, height=800,
	labels={"OFF_RATING": "Offensive Rating (higher is better)",
	"DEF_RATING": "Dffensive Rating (to the left is better)",

JP Hwang databyjp