Skip to content

Instantly share code, notes, and snippets.

thresh_days = 20
df = df.assign(legend="Other teams")
df.loc[df["TEAM_NAME"] == highlight_tm, "legend"] = f"{thresh_days}+ days ago"
df.loc[(df.days_since_game < thresh_days) & (df["TEAM_NAME"] == highlight_tm), "legend"] = f"In the last {thresh_days} days"
fig = px.scatter(df, x="DEF_RATING", y="OFF_RATING",
color="legend",
title=f"Game-by-game performances by the {highlight_tm} - '21-'22",
color_discrete_sequence=["#dddddd", color_dict[highlight_tm][0], color_dict[highlight_tm][1]],
size="inv_days_since_game",
import logging
import pandas as pd
import plotly.express as px
import json
import os
import utils
logger = logging.getLogger(__name__)
root_logger = logging.getLogger()
root_logger.setLevel(logging.INFO)
def update_box_scores(box_json_dir="dl_data/box_scores/json"):
"""
Download all box scores from NBA API
:param box_json_dir: Directory to save downloads to
:return:
"""
logger.info("Starting download of game box scores...")
gldf = utils.load_gamelogs()
gldf = gldf.sort_values("gamedate_dt")
gm_ids = gldf["Game_ID"].unique()
def fetch_box_score(json_dir, gm_id):
"""
Download one box score from NBA API
:param json_dir: Directory for saving downloaded JSON
:param gm_id: NBA game ID
:return:
"""
from nba_api.stats.endpoints import boxscoreadvancedv2
dl_path = os.path.join(json_dir, gm_id + ".json")
if os.path.exists(dl_path):
GAME_ID TEAM_ID TEAM_ABBREVIATION TEAM_CITY PLAYER_ID PLAYER_NAME NICKNAME START_POSITION COMMENT MIN ... TM_TOV_PCT EFG_PCT TS_PCT USG_PCT E_USG_PCT E_PACE PACE PACE_PER40 POSS PIE
0 0022100860 1610612760 OKC Oklahoma City 1629647 Darius Bazley Darius F 39:34 ... 0.0 0.529 0.573 0.213 0.215 105.83 104.33 86.94 85 0.115
1 0022100860 1610612760 OKC Oklahoma City 1630598 Aaron Wiggins Aaron F 33:50 ... 0.0 0.200 0.200 0.059 0.064 103.94 107.12 89.27 76 0.025
2 0022100860 1610612760 OKC Oklahoma City 1629676 Isaiah Roby Isaiah C 22:03 ... 0.0 0.500 0.436 0.132 0.132 101.01 97.96 81.63 46 0.020
3 0022100860 1610612760 OKC Oklahoma C
GAME_ID TEAM_ID TEAM_NAME TEAM_ABBREVIATION TEAM_CITY MIN E_OFF_RATING OFF_RATING E_DEF_RATING DEF_RATING ... TM_TOV_PCT EFG_PCT TS_PCT USG_PCT E_USG_PCT E_PACE PACE PACE_PER40 POSS PIE
0 0022100860 1610612760 Thunder OKC Oklahoma City 265:00 114.5 116.5 109.2 110.8 ... 10.1 0.568 0.589 1.0 0.202 101.23 99.62 83.02 109 0.531
1 0022100860 1610612752 Knicks NYK New York 265:00 109.2 110.8 114.5 116.5 ... 12.6 0.486 0.523 1.0 0.197 101.23 99.62 83.02 111 0.469
[2 rows x 29 columns]
temp_syms = [s for s in list(sp500_df[sp500_df.Sector == sect].Symbol.values) if s in df.symbol.unique()]
temp_syms = random.sample(temp_syms, n_symbols)
pf1_df = df[df.symbol.isin(temp_syms)]
pf1_avg = get_avg_prices(pf1_df)
pf1_var = pf1_avg.var()
# ========== DETERMINE SIMILARITIES ==========
# Calculate similarities between each stock
r_array = np.zeros([len(symbols), len(symbols)])
p_array = np.zeros([len(symbols), len(symbols)])
for i in range(len(symbols)):
for j in range(len(symbols)):
vals_i = df[df["symbol"] == symbols[i]]['close'].values
vals_j = df[df["symbol"] == symbols[j]]['close'].values
r_ij, p_ij = scipy.stats.pearsonr(vals_i, vals_j)
r_array[i, j] = r_ij
def normalise_price(df):
df = df.assign(norm_close=0)
for symbol in df["symbol"].unique():
symbol_df = df[df["symbol"] == symbol]
min_date = symbol_df["date"].min()
ref_val = symbol_df[symbol_df["date"] == min_date]["close"].values[0]
df.loc[df["symbol"] == symbol, "norm_close"] = df.loc[df["symbol"] == symbol, "close"] / ref_val
return df
# ========== Get token ==========
with open("../../tokens/iex_token.txt", "r") as f:
iex_tkn = f.read().strip()
# ========== Get symbols to use ==========
symbol_list = ["MSFT", "AAPL", "NVDA", "JNJ", "KHC", "ALL"]
# ========== Get symbols to use ==========
date_range = '3m'
symbol_dict = dict()