Skip to content

Instantly share code, notes, and snippets.

@quantra-go-algo
Created May 3, 2026 20:45
Show Gist options
  • Select an option

  • Save quantra-go-algo/22c05149890e7ad0c411234817c48e05 to your computer and use it in GitHub Desktop.

Select an option

Save quantra-go-algo/22c05149890e7ad0c411234817c48e05 to your computer and use it in GitHub Desktop.
def fit_kmeans_pre_oos(df_feat: pd.DataFrame) -> tuple[KMeans, np.ndarray, np.ndarray, dict[int, str]]:
"""
Fit KMeans using ONLY data BEFORE OOS_START (prevents training leakage).
"""
split = pd.to_datetime(OOS_START)
rows = []
for t in label_dates(df_feat.index):
if t >= split:
break
i = df_feat.index.get_loc(t)
w = df_feat.iloc[i - LOOKBACK_DAYS : i]
s = window_summary(w)
v = np.array([s["mean_ret"], s["ann_vol"], s["trend_score"], s["atr_norm"], s["max_dd"]], dtype=float)
if np.isnan(v).any():
continue
rows.append(v)
X = np.vstack(rows)
mu = X.mean(axis=0)
sd = X.std(axis=0) + 1e-12
Xn = (X - mu) / sd
km = KMeans(n_clusters=5, n_init=10, random_state=42).fit(Xn)
# map clusters to regime names (simple heuristic)
centers = km.cluster_centers_
trend_dim, vol_dim = 2, 1
trend_rank = np.argsort(centers[:, trend_dim])
vol_rank = np.argsort(centers[:, vol_dim])
c2r = {c: "RANGE" for c in range(5)}
c2r[int(trend_rank[-1])] = "TREND_UP"
c2r[int(trend_rank[0])] = "TREND_DOWN"
c2r[int(vol_rank[-1])] = "HIGH_VOL"
c2r[int(vol_rank[0])] = "LOW_VOL"
return km, mu, sd, c2r
def label_regimes_kmeans(df_feat: pd.DataFrame) -> pd.Series:
km, mu, sd, c2r = fit_kmeans_pre_oos(df_feat)
labels = {}
for t in label_dates(df_feat.index):
i = df_feat.index.get_loc(t)
w = df_feat.iloc[i - LOOKBACK_DAYS : i]
s = window_summary(w)
v = np.array([s["mean_ret"], s["ann_vol"], s["trend_score"], s["atr_norm"], s["max_dd"]], dtype=float)
if np.isnan(v).any():
continue
vn = (v - mu) / sd
cl = int(km.predict(vn.reshape(1, -1))[0])
labels[t] = c2r.get(cl, "UNCERTAIN")
sparse = pd.Series(labels).sort_index()
return sparse.reindex(df_feat.index).ffill().fillna("UNCERTAIN")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment