Skip to content

Instantly share code, notes, and snippets.

fig.add_shape(type="rect",
x0=rtg_min, y0=rtg_mid, x1=rtg_mid, y1=rtg_max,
fillcolor="LightSeaGreen",
opacity=0.25,
line_width=0,
layer="below"
)
fig.add_shape(type="rect",
x0=rtg_mid, y0=rtg_min, x1=rtg_max, y1=rtg_mid,
fillcolor="LightSteelBlue",
fig = px.scatter(df, x="DEF_RATING", y="OFF_RATING",
color="legend",
title=f"Game-by-game performances by the {highlight_tm} - '21-'22",
color_discrete_sequence=["#eeeeee", color_dict[highlight_tm][0], color_dict[highlight_tm][1]],
size="inv_days_since_game",
size_max=18,
template="plotly_white",
width=1200, height=800,
labels={"OFF_RATING": "Offensive Rating (higher is better)",
"DEF_RATING": "Dffensive Rating (to the left is better)",
thresh_days = 20
df = df.assign(legend="Other teams")
df.loc[df["TEAM_NAME"] == highlight_tm, "legend"] = f"{thresh_days}+ days ago"
df.loc[(df.days_since_game < thresh_days) & (df["TEAM_NAME"] == highlight_tm), "legend"] = f"In the last {thresh_days} days"
fig = px.scatter(df, x="DEF_RATING", y="OFF_RATING",
color="legend",
title=f"Game-by-game performances by the {highlight_tm} - '21-'22",
color_discrete_sequence=["#dddddd", color_dict[highlight_tm][0], color_dict[highlight_tm][1]],
size="inv_days_since_game",
import logging
import pandas as pd
import plotly.express as px
import json
import os
import utils
logger = logging.getLogger(__name__)
root_logger = logging.getLogger()
root_logger.setLevel(logging.INFO)
def update_box_scores(box_json_dir="dl_data/box_scores/json"):
"""
Download all box scores from NBA API
:param box_json_dir: Directory to save downloads to
:return:
"""
logger.info("Starting download of game box scores...")
gldf = utils.load_gamelogs()
gldf = gldf.sort_values("gamedate_dt")
gm_ids = gldf["Game_ID"].unique()
def fetch_box_score(json_dir, gm_id):
"""
Download one box score from NBA API
:param json_dir: Directory for saving downloaded JSON
:param gm_id: NBA game ID
:return:
"""
from nba_api.stats.endpoints import boxscoreadvancedv2
dl_path = os.path.join(json_dir, gm_id + ".json")
if os.path.exists(dl_path):
GAME_ID TEAM_ID TEAM_ABBREVIATION TEAM_CITY PLAYER_ID PLAYER_NAME NICKNAME START_POSITION COMMENT MIN ... TM_TOV_PCT EFG_PCT TS_PCT USG_PCT E_USG_PCT E_PACE PACE PACE_PER40 POSS PIE
0 0022100860 1610612760 OKC Oklahoma City 1629647 Darius Bazley Darius F 39:34 ... 0.0 0.529 0.573 0.213 0.215 105.83 104.33 86.94 85 0.115
1 0022100860 1610612760 OKC Oklahoma City 1630598 Aaron Wiggins Aaron F 33:50 ... 0.0 0.200 0.200 0.059 0.064 103.94 107.12 89.27 76 0.025
2 0022100860 1610612760 OKC Oklahoma City 1629676 Isaiah Roby Isaiah C 22:03 ... 0.0 0.500 0.436 0.132 0.132 101.01 97.96 81.63 46 0.020
3 0022100860 1610612760 OKC Oklahoma C
GAME_ID TEAM_ID TEAM_NAME TEAM_ABBREVIATION TEAM_CITY MIN E_OFF_RATING OFF_RATING E_DEF_RATING DEF_RATING ... TM_TOV_PCT EFG_PCT TS_PCT USG_PCT E_USG_PCT E_PACE PACE PACE_PER40 POSS PIE
0 0022100860 1610612760 Thunder OKC Oklahoma City 265:00 114.5 116.5 109.2 110.8 ... 10.1 0.568 0.589 1.0 0.202 101.23 99.62 83.02 109 0.531
1 0022100860 1610612752 Knicks NYK New York 265:00 109.2 110.8 114.5 116.5 ... 12.6 0.486 0.523 1.0 0.197 101.23 99.62 83.02 111 0.469
[2 rows x 29 columns]
temp_syms = [s for s in list(sp500_df[sp500_df.Sector == sect].Symbol.values) if s in df.symbol.unique()]
temp_syms = random.sample(temp_syms, n_symbols)
pf1_df = df[df.symbol.isin(temp_syms)]
pf1_avg = get_avg_prices(pf1_df)
pf1_var = pf1_avg.var()
# ========== DETERMINE SIMILARITIES ==========
# Calculate similarities between each stock
r_array = np.zeros([len(symbols), len(symbols)])
p_array = np.zeros([len(symbols), len(symbols)])
for i in range(len(symbols)):
for j in range(len(symbols)):
vals_i = df[df["symbol"] == symbols[i]]['close'].values
vals_j = df[df["symbol"] == symbols[j]]['close'].values
r_ij, p_ij = scipy.stats.pearsonr(vals_i, vals_j)
r_array[i, j] = r_ij