Skip to content

Instantly share code, notes, and snippets.

@quantra-go-algo
Created May 3, 2026 20:47
Show Gist options
  • Select an option

  • Save quantra-go-algo/97f1dfe735d8702670af47a36e57be79 to your computer and use it in GitHub Desktop.

Select an option

Save quantra-go-algo/97f1dfe735d8702670af47a36e57be79 to your computer and use it in GitHub Desktop.
def load_cache() -> dict:
p = Path(LLM_CACHE_FILE)
if p.exists():
return json.loads(p.read_text(encoding="utf-8"))
return {}
def save_cache(cache: dict) -> None:
Path(LLM_CACHE_FILE).write_text(json.dumps(cache, indent=2, sort_keys=True), encoding="utf-8")
def parse_regime(text: str) -> str:
"""DeepSeek should return JSON; we also try regex fallback."""
t = text.strip()
try:
obj = json.loads(t)
if isinstance(obj, dict):
r = str(obj.get("regime", "")).upper().strip()
if r in REGIME_SET:
return r
except Exception:
pass
m = re.search(r"\b(TREND_UP|TREND_DOWN|RANGE|HIGH_VOL|LOW_VOL|UNCERTAIN)\b", t.upper())
if m:
return m.group(1)
return "UNCERTAIN"
def deepseek_label(summary: dict) -> str:
url = DEEPSEEK_BASE_URL.rstrip("/") + "/chat/completions"
headers = {"Authorization": f"Bearer {DEEPSEEK_API_KEY}", "Content-Type": "application/json"}
system = (
"You are a quantitative researcher.\n"
"Label the market regime using ONLY the numeric summary provided.\n"
"Do not use outside knowledge.\n"
"Return STRICT JSON only: {\"regime\": <one label>, \"reason\": <short>}."
)
user = {
"summary": summary,
"allowed_labels": REGIME_SET,
"hint": {
"TREND_UP": "trend_score > 0 and not high vol",
"TREND_DOWN": "trend_score < 0 and not high vol",
"RANGE": "|trend_score| small; z_last suggests mean reversion",
"HIGH_VOL": "ann_vol high or atr_norm high or max_dd very negative",
"LOW_VOL": "ann_vol low and atr_norm low",
"UNCERTAIN": "if ambiguous",
},
}
payload = {
"model": DEEPSEEK_MODEL,
"messages": [
{"role": "system", "content": system},
{"role": "user", "content": json.dumps(user)},
],
"temperature": 0.0,
"max_tokens": 120,
"stream": False,
}
r = requests.post(url, headers=headers, json=payload, timeout=LLM_TIMEOUT_S)
r.raise_for_status()
content = r.json()["choices"][0]["message"]["content"]
return parse_regime(content)
def label_regimes_llm(df_feat: pd.DataFrame) -> pd.Series:
"""
Label regimes with DeepSeek for the full history (but only at label dates).
Labels are forward-filled across days.
Cached to JSON so re-runs are cheap.
"""
cache = load_cache()
labels = {}
for t in label_dates(df_feat.index):
i = df_feat.index.get_loc(t)
w = df_feat.iloc[i - LOOKBACK_DAYS : i]
s = window_summary(w)
cache_key = f"{t.date().isoformat()}::{SYMBOL}::{LOOKBACK_DAYS}::{DEEPSEEK_MODEL}"
if cache_key in cache:
labels[t] = cache[cache_key]
continue
reg = deepseek_label(s)
if reg not in REGIME_SET:
reg = "UNCERTAIN"
cache[cache_key] = reg
save_cache(cache)
labels[t] = reg
time.sleep(LLM_RATE_LIMIT_S)
sparse = pd.Series(labels).sort_index()
return sparse.reindex(df_feat.index).ffill().fillna("UNCERTAIN")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment