Created
May 3, 2026 20:47
-
-
Save quantra-go-algo/97f1dfe735d8702670af47a36e57be79 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def load_cache() -> dict: | |
| p = Path(LLM_CACHE_FILE) | |
| if p.exists(): | |
| return json.loads(p.read_text(encoding="utf-8")) | |
| return {} | |
| def save_cache(cache: dict) -> None: | |
| Path(LLM_CACHE_FILE).write_text(json.dumps(cache, indent=2, sort_keys=True), encoding="utf-8") | |
| def parse_regime(text: str) -> str: | |
| """DeepSeek should return JSON; we also try regex fallback.""" | |
| t = text.strip() | |
| try: | |
| obj = json.loads(t) | |
| if isinstance(obj, dict): | |
| r = str(obj.get("regime", "")).upper().strip() | |
| if r in REGIME_SET: | |
| return r | |
| except Exception: | |
| pass | |
| m = re.search(r"\b(TREND_UP|TREND_DOWN|RANGE|HIGH_VOL|LOW_VOL|UNCERTAIN)\b", t.upper()) | |
| if m: | |
| return m.group(1) | |
| return "UNCERTAIN" | |
| def deepseek_label(summary: dict) -> str: | |
| url = DEEPSEEK_BASE_URL.rstrip("/") + "/chat/completions" | |
| headers = {"Authorization": f"Bearer {DEEPSEEK_API_KEY}", "Content-Type": "application/json"} | |
| system = ( | |
| "You are a quantitative researcher.\n" | |
| "Label the market regime using ONLY the numeric summary provided.\n" | |
| "Do not use outside knowledge.\n" | |
| "Return STRICT JSON only: {\"regime\": <one label>, \"reason\": <short>}." | |
| ) | |
| user = { | |
| "summary": summary, | |
| "allowed_labels": REGIME_SET, | |
| "hint": { | |
| "TREND_UP": "trend_score > 0 and not high vol", | |
| "TREND_DOWN": "trend_score < 0 and not high vol", | |
| "RANGE": "|trend_score| small; z_last suggests mean reversion", | |
| "HIGH_VOL": "ann_vol high or atr_norm high or max_dd very negative", | |
| "LOW_VOL": "ann_vol low and atr_norm low", | |
| "UNCERTAIN": "if ambiguous", | |
| }, | |
| } | |
| payload = { | |
| "model": DEEPSEEK_MODEL, | |
| "messages": [ | |
| {"role": "system", "content": system}, | |
| {"role": "user", "content": json.dumps(user)}, | |
| ], | |
| "temperature": 0.0, | |
| "max_tokens": 120, | |
| "stream": False, | |
| } | |
| r = requests.post(url, headers=headers, json=payload, timeout=LLM_TIMEOUT_S) | |
| r.raise_for_status() | |
| content = r.json()["choices"][0]["message"]["content"] | |
| return parse_regime(content) | |
| def label_regimes_llm(df_feat: pd.DataFrame) -> pd.Series: | |
| """ | |
| Label regimes with DeepSeek for the full history (but only at label dates). | |
| Labels are forward-filled across days. | |
| Cached to JSON so re-runs are cheap. | |
| """ | |
| cache = load_cache() | |
| labels = {} | |
| for t in label_dates(df_feat.index): | |
| i = df_feat.index.get_loc(t) | |
| w = df_feat.iloc[i - LOOKBACK_DAYS : i] | |
| s = window_summary(w) | |
| cache_key = f"{t.date().isoformat()}::{SYMBOL}::{LOOKBACK_DAYS}::{DEEPSEEK_MODEL}" | |
| if cache_key in cache: | |
| labels[t] = cache[cache_key] | |
| continue | |
| reg = deepseek_label(s) | |
| if reg not in REGIME_SET: | |
| reg = "UNCERTAIN" | |
| cache[cache_key] = reg | |
| save_cache(cache) | |
| labels[t] = reg | |
| time.sleep(LLM_RATE_LIMIT_S) | |
| sparse = pd.Series(labels).sort_index() | |
| return sparse.reindex(df_feat.index).ffill().fillna("UNCERTAIN") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment