litellm ❯ python scripts/sanity/chutes_experimental_json_sanity_curl.py --execute --model moonshotai/Kimi-K2-Thinking --verbose-json
======================== echo_true | FAIL (no_message_content) ========================
{"id":"d1a7725469af49babb48b30c03a51d5a","object":"chat.completion","created":1762907127,"model":"moonshotai/Kimi-K2-Thinking","choices":[{"index":0,"message":{"role":"assistant","content":null,"reasoning_content":"The user wants me to return only `{\"ok\": true}` as JSON","tool_calls":null},"logprobs":null,"finish_reason":"length","matched_stop":null}],"usage":{"prompt_tokens":34,"total_tokens":50,"completion_tokens":16,"prompt_tokens_details":{"cached_tokens":4},"reasoning_tokens":0},"metadata":{"weight_version":"default"},"chutes_verification":"5080c3baf77c2f77d10e168b88a60043"}
================================================
======================== sum_chain | FAIL (no_message_content) ========================
{"id":"281a0d3deaa24cf78ca2c863ac31f7e4","object":"chat.completion","created":1762907130,"model":"moonshotai/Kimi-K2-Thinking","choices":[{"index":0,"message":{"role":"assistant","content":null,"reasoning_content":"The user wants me to compute 17 + 28 + 13 and respond with a JSON object in a specific format.\n\nLet me calculate the sum:\n17 + 28 = 45\n45 + 13 = 58","tool_calls":null},"logprobs":null,"finish_reason":"length","matched_stop":null}],"usage":{"prompt_tokens":60,"total_tokens":108,"completion_tokens":48,"prompt_tokens_details":{"cached_tokens":20},"reasoning_tokens":0},"metadata":{"weight_version":"default"},"chutes_verification":"d1107cefa7a45d077a698a7b88ecca41"}
================================================
======================== country_snapshot | FAIL (no_message_content) ========================
{"id":"624e04d123864950a413f9ded4c4d242","object":"chat.completion","created":1762907132,"model":"moonshotai/Kimi-K2-Thinking","choices":[{"index":0,"message":{"role":"assistant","content":null,"reasoning_content":"The user wants me to respond with JSON containing information about France.\nThe required keys are:\n- country\n- capital\n- continent\n\nThe example shape is","tool_calls":null},"logprobs":null,"finish_reason":"length","matched_stop":null}],"usage":{"prompt_tokens":56,"total_tokens":88,"completion_tokens":32,"prompt_tokens_details":{"cached_tokens":4},"reasoning_tokens":0},"metadata":{"weight_version":"default"},"chutes_verification":"8855b161cb29561f0dcad39488e6fd41"}
================================================
======================== migration_plan | FAIL (no_message_content) ========================
{"id":"996b139a51794dceaa565e9de54cf467","object":"chat.completion","created":1762907135,"model":"moonshotai/Kimi-K2-Thinking","choices":[{"index":0,"message":{"role":"assistant","content":null,"reasoning_content":"The user wants a three-step plan for migrating a REST API to Chutes. I need to respond strictly in JSON format with a specific schema:\n- \"steps\": an array of objects, each with \"id\" (number), \"task\" (string), and \"owner\" (string)\n- \"confidence\": a string that can be \"high\", \"medium\", or \"low\"\n\nFirst, I need to understand what \"Chutes\" refers to in this context. Chutes is a platform for building and deploying APIs with a focus on serverless functions and edge computing. It's designed to make API development faster and more scalable.\n\nLet me create a practical three-step migration plan:\n\nStep 1: Assessment and Preparation\n- This would involve analyzing the current REST API, identifying endpoints, dependencies","tool_calls":null},"logprobs":null,"finish_reason":"length","matched_stop":null}],"usage":{"prompt_tokens":70,"total_tokens":230,"completion_tokens":160,"prompt_tokens_details":{"cached_tokens":20},"reasoning_tokens":0},"metadata":{"weight_version":"default"},"chutes_verification":"1ed6fed6cf1a552cb087311b082259ee"}
================================================
======================== decision_matrix | FAIL (no_message_content) ========================
{"id":"005785bb77694edf960fa10c26836d99","object":"chat.completion","created":1762907139,"model":"moonshotai/Kimi-K2-Thinking","choices":[{"index":0,"message":{"role":"assistant","content":null,"reasoning_content":"The user wants me to compare two deployment strategies: 'low_latency' and 'high_accuracy'. I need to provide a JSON response with:\n1. A list of scores for each option, each containing:\n - The option name\n - A score between 0 and 1\n - A justification string\n2. A winner field indicating which option is better overall\n\nLet me think about the characteristics of each strategy:\n\n**Low Latency Strategy:**\n- Focuses on minimizing response time\n- Good for real-time applications (gaming, live streaming, financial trading, autonomous vehicles)\n- May sacrifice some accuracy for speed\n- Uses techniques like model quantization, pruning, distillation, edge deployment\n- Prioritizes user experience where immediate feedback is critical\n- Trade-offs: potentially lower accuracy, simpler models\n\n**High Accuracy Strategy:**\n- Focuses on maximizing prediction correctness\n- Good for applications where errors are costly (medical diagnosis, fraud detection, scientific research)\n- May sacrifice","tool_calls":null},"logprobs":null,"finish_reason":"length","matched_stop":null}],"usage":{"prompt_tokens":90,"total_tokens":290,"completion_tokens":200,"prompt_tokens_details":{"cached_tokens":20},"reasoning_tokens":0},"metadata":{"weight_version":"default"},"chutes_verification":"0896bdc89bf86de8126d9bc5d00c960c"}
================================================
RESULT FAIL 0/5 model=moonshotai/Kimi-K2-Thinking elapsed_s=13.936 reasons=no_message_content×5
Last active
November 12, 2025 13:37
-
-
Save grahama1970/95c747c461c11ffb0993fb648e111c15 to your computer and use it in GitHub Desktop.
Repro for Kimi-K2-Thinking on Chutes: JSON returns choices[0].message.content=null while reasoning_content contains the payload. Happens with response_format=json_object and json_schema; persists with larger max_tokens. Req id included for log lookup.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| curl -sS -L "$CHUTES_API_BASE/chat/completions" \ | |
| -H "Authorization: Bearer $CHUTES_API_KEY" -H "Content-Type: application/json" \ | |
| -d '{ | |
| "model":"moonshotai/Kimi-K2-Thinking", | |
| "messages":[ | |
| {"role":"system","content":"Respond strictly with valid JSON."}, | |
| {"role":"user","content":"Return only {\"ok\": true} as JSON."} | |
| ], | |
| "response_format":{"type":"json_object"}, | |
| "max_tokens":64, | |
| "temperature":0 | |
| }' | jq '.choices[0].message' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """Lightweight curl-based variant of the Chutes experimental JSON sanity probe. | |
| This script mirrors the scenarios from chutes_experimental_json_sanity.py but | |
| uses the system `curl` binary for every request so that developers can inspect | |
| and replay the exact HTTP traffic without going through the SciLLM client. | |
| python scripts/sanity/chutes_experimental_json_sanity_curl.py --execute --model moonshotai/Kimi-K2-Thinking | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import os | |
| import shlex | |
| import shutil | |
| import subprocess | |
| import sys | |
| import time | |
| from typing import Any, Dict, List, Tuple | |
| from dotenv import find_dotenv, load_dotenv | |
| STATUS_MARKER = "__CURL_HTTP_STATUS__" | |
| def _scenario_definitions(system_prompt: str) -> List[Dict[str, Any]]: | |
| return [ | |
| { | |
| "scenario": "echo_true", | |
| "messages": [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": "Return only {\"ok\": true} as JSON."}, | |
| ], | |
| "response_format": {"type": "json_object"}, | |
| "max_tokens": 16, | |
| "temperature": 0, | |
| }, | |
| { | |
| "scenario": "sum_chain", | |
| "messages": [ | |
| {"role": "system", "content": system_prompt}, | |
| { | |
| "role": "user", | |
| "content": ( | |
| "Compute 17 + 28 + 13. Respond strictly with a JSON object " | |
| '{"problem":"17+28+13","answer":58,"explanation":<brief string>}' | |
| ), | |
| }, | |
| ], | |
| "response_format": { | |
| "type": "json_schema", | |
| "json_schema": { | |
| "name": "ArithmeticAnswer", | |
| "schema": { | |
| "type": "object", | |
| "properties": { | |
| "problem": {"type": "string"}, | |
| "answer": {"type": "integer"}, | |
| "explanation": {"type": "string"}, | |
| }, | |
| "required": ["problem", "answer"], | |
| }, | |
| }, | |
| }, | |
| "max_tokens": 48, | |
| "temperature": 0, | |
| }, | |
| { | |
| "scenario": "country_snapshot", | |
| "messages": [ | |
| {"role": "system", "content": system_prompt}, | |
| { | |
| "role": "user", | |
| "content": ( | |
| "Respond with JSON containing the keys country, capital, and continent for France. " | |
| 'Example shape: {"country":"France","capital":"Paris","continent":"Europe"}.' | |
| ), | |
| }, | |
| ], | |
| "response_format": { | |
| "type": "json_schema", | |
| "json_schema": { | |
| "name": "CountrySnapshot", | |
| "schema": { | |
| "type": "object", | |
| "properties": { | |
| "country": {"type": "string"}, | |
| "capital": {"type": "string"}, | |
| "continent": {"type": "string"}, | |
| }, | |
| "required": ["country", "capital", "continent"], | |
| }, | |
| }, | |
| }, | |
| "max_tokens": 32, | |
| "temperature": 0, | |
| }, | |
| { | |
| "scenario": "migration_plan", | |
| "messages": [ | |
| {"role": "system", "content": system_prompt}, | |
| { | |
| "role": "user", | |
| "content": ( | |
| "Create a three step plan for migrating a REST API to Chutes. " | |
| 'Respond strictly as {"steps":[{"id":1,"task":<string>,"owner":<string>}...],"confidence":<high|medium|low>}' | |
| ), | |
| }, | |
| ], | |
| "response_format": { | |
| "type": "json_schema", | |
| "json_schema": { | |
| "name": "MigrationPlan", | |
| "schema": { | |
| "type": "object", | |
| "properties": { | |
| "steps": { | |
| "type": "array", | |
| "minItems": 3, | |
| "items": { | |
| "type": "object", | |
| "properties": { | |
| "id": {"type": "integer"}, | |
| "task": {"type": "string"}, | |
| "owner": {"type": "string"}, | |
| }, | |
| "required": ["id", "task", "owner"], | |
| }, | |
| }, | |
| "confidence": {"type": "string"}, | |
| }, | |
| "required": ["steps"], | |
| }, | |
| }, | |
| }, | |
| "max_tokens": 160, | |
| "temperature": 0.1, | |
| }, | |
| { | |
| "scenario": "decision_matrix", | |
| "messages": [ | |
| {"role": "system", "content": system_prompt}, | |
| { | |
| "role": "user", | |
| "content": ( | |
| "Compare the deployment strategies 'low_latency' and 'high_accuracy'. Respond as " | |
| '{"scores":[{"option":"low_latency","score":<0-1>,"justification":<string>},' | |
| '{"option":"high_accuracy","score":<0-1>,"justification":<string>}],"winner":<string from options>}.' | |
| ), | |
| }, | |
| ], | |
| "response_format": { | |
| "type": "json_schema", | |
| "json_schema": { | |
| "name": "DecisionMatrix", | |
| "schema": { | |
| "type": "object", | |
| "properties": { | |
| "scores": { | |
| "type": "array", | |
| "minItems": 2, | |
| "items": { | |
| "type": "object", | |
| "properties": { | |
| "option": {"type": "string"}, | |
| "score": {"type": "number"}, | |
| "justification": {"type": "string"}, | |
| }, | |
| "required": ["option", "score", "justification"], | |
| }, | |
| }, | |
| "winner": {"type": "string"}, | |
| }, | |
| "required": ["scores", "winner"], | |
| }, | |
| }, | |
| }, | |
| "max_tokens": 200, | |
| "temperature": 0.2, | |
| }, | |
| ] | |
| def _validate_payload(scenario: str, payload: Any) -> Tuple[bool, str | None]: | |
| if not isinstance(payload, dict): | |
| return False, "payload_not_dict" | |
| if scenario == "echo_true": | |
| return (payload.get("ok") is True, None if payload.get("ok") is True else "missing_ok_true") | |
| if scenario == "sum_chain": | |
| prob = payload.get("problem") | |
| ans = payload.get("answer") | |
| if prob != "17+28+13" or ans != 58: | |
| return False, f"mismatch:problem={prob},answer={ans}" | |
| return True, None | |
| if scenario == "country_snapshot": | |
| ctry = str(payload.get("country") or "").lower() | |
| capital = str(payload.get("capital") or "").lower() | |
| continent = str(payload.get("continent") or "").lower() | |
| ok = ctry == "france" and capital == "paris" and continent == "europe" | |
| return (ok, None if ok else "country_snapshot_mismatch") | |
| if scenario == "migration_plan": | |
| steps = payload.get("steps") | |
| if not isinstance(steps, list) or len(steps) < 3: | |
| return False, "missing_steps" | |
| for step in steps: | |
| if not isinstance(step, dict): | |
| return False, "invalid_step" | |
| if not isinstance(step.get("task"), str) or not step.get("task"): | |
| return False, "empty_task" | |
| if not isinstance(step.get("owner"), str) or not step.get("owner"): | |
| return False, "empty_owner" | |
| return True, None | |
| if scenario == "decision_matrix": | |
| scores = payload.get("scores") | |
| if not isinstance(scores, list) or len(scores) < 2: | |
| return False, "missing_scores" | |
| opts = {"low_latency", "high_accuracy"} | |
| seen = set() | |
| for entry in scores: | |
| if not isinstance(entry, dict): | |
| return False, "invalid_score_entry" | |
| opt = entry.get("option") | |
| if opt not in opts: | |
| return False, "unknown_option" | |
| if not isinstance(entry.get("justification"), str) or not entry.get("justification"): | |
| return False, "missing_justification" | |
| seen.add(opt) | |
| winner = payload.get("winner") | |
| if winner not in seen: | |
| return False, "invalid_winner" | |
| return True, None | |
| return False, "unknown_scenario" | |
| def _env_float(name: str, default: float) -> float: | |
| raw = os.getenv(name) | |
| if raw is None: | |
| return default | |
| try: | |
| return float(raw) | |
| except ValueError: | |
| return default | |
| def _env_int(name: str, default: int) -> int: | |
| raw = os.getenv(name) | |
| if raw is None: | |
| return default | |
| try: | |
| return int(raw) | |
| except ValueError: | |
| return default | |
| def _repair_json_string(raw: str) -> str | None: | |
| text = raw.strip() | |
| lowered = text.lower() | |
| if lowered.startswith("```json"): | |
| text = text[text.find("\n") + 1 :] | |
| if text.startswith("```"): | |
| text = text[3:] | |
| if text.endswith("```"): | |
| text = text[:-3] | |
| text = text.strip() | |
| start = text.find("{") | |
| end = text.rfind("}") | |
| if start == -1 or end == -1 or end <= start: | |
| return None | |
| candidate = text[start : end + 1] | |
| try: | |
| json.loads(candidate) | |
| except json.JSONDecodeError: | |
| return None | |
| return candidate | |
| def _extract_message_and_json(raw_text: str) -> Tuple[str | None, str | None, Any]: | |
| try: | |
| parsed = json.loads(raw_text) | |
| except json.JSONDecodeError: | |
| return None, "response_not_json", None | |
| if isinstance(parsed, dict): | |
| error_obj = parsed.get("error") | |
| if error_obj: | |
| if isinstance(error_obj, dict): | |
| message = error_obj.get("message") or error_obj.get("type") | |
| else: | |
| message = str(error_obj) | |
| return None, message or "chutes_error", parsed | |
| choices = parsed.get("choices") | |
| if isinstance(choices, list) and choices: | |
| choice = choices[0] or {} | |
| message = choice.get("message") or {} | |
| content = message.get("content") | |
| if isinstance(content, list): | |
| combined = "".join(part.get("text", "") if isinstance(part, dict) else str(part) for part in content) | |
| content = combined | |
| if isinstance(content, str): | |
| return content, None, parsed | |
| output = parsed.get("output") if isinstance(parsed, dict) else None | |
| if isinstance(output, dict): | |
| text = output.get("text") | |
| if isinstance(text, str): | |
| return text, None, parsed | |
| return None, "no_message_content", parsed | |
| def _build_curl_command(endpoint: str, api_key: str, payload: Dict[str, Any], timeout: float, headers: List[str]) -> Tuple[List[str], str]: | |
| data = json.dumps(payload, ensure_ascii=False) | |
| cmd = [ | |
| "curl", | |
| "--silent", | |
| "--show-error", | |
| "--location", | |
| "--max-time", | |
| str(max(timeout, 1e-3)), | |
| "--header", | |
| "Content-Type: application/json", | |
| "--header", | |
| f"Authorization: Bearer {api_key}", | |
| ] | |
| for header in headers: | |
| cmd.extend(["--header", header]) | |
| cmd.extend([ | |
| "--request", | |
| "POST", | |
| "--data-binary", | |
| data, | |
| "--url", | |
| endpoint, | |
| "--write-out", | |
| f"\n{STATUS_MARKER}%{{http_code}}", | |
| ]) | |
| quoted = " ".join(shlex.quote(part) for part in cmd) | |
| return cmd, quoted | |
| def _invoke_curl(endpoint: str, api_key: str, payload: Dict[str, Any], timeout: float, headers: List[str]) -> Dict[str, Any]: | |
| cmd, formatted = _build_curl_command(endpoint, api_key, payload, timeout, headers) | |
| start = time.time() | |
| proc = subprocess.run(cmd, capture_output=True, text=True) | |
| elapsed = time.time() - start | |
| stdout = proc.stdout or "" | |
| stderr = proc.stderr or "" | |
| body = stdout | |
| status_code = None | |
| if STATUS_MARKER in stdout: | |
| prefix, _, suffix = stdout.rpartition(STATUS_MARKER) | |
| body = prefix.rstrip("\n") | |
| candidate = suffix.strip() | |
| if candidate: | |
| try: | |
| status_code = int(candidate) | |
| except ValueError: | |
| status_code = None | |
| ok = proc.returncode == 0 and (status_code is None or status_code < 400) | |
| error = None | |
| if not ok: | |
| if proc.returncode != 0: | |
| error = f"curl_exit_{proc.returncode}" | |
| elif status_code is not None and status_code >= 400: | |
| error = f"http_{status_code}" | |
| if stderr: | |
| error = f"{error}:{stderr.strip()}" if error else stderr.strip() | |
| return { | |
| "ok": ok, | |
| "body": body, | |
| "status_code": status_code, | |
| "stderr": stderr.strip(), | |
| "returncode": proc.returncode, | |
| "elapsed_s": elapsed, | |
| "command": formatted, | |
| "error": error, | |
| } | |
| def _format_curl_preview(endpoint: str, payload: Dict[str, Any], timeout: float, headers: List[str]) -> str: | |
| data = json.dumps(payload, ensure_ascii=False) | |
| parts = [ | |
| "curl", | |
| "-sS", | |
| "-L", | |
| "--max-time", | |
| str(max(timeout, 1e-3)), | |
| "-H", | |
| "Content-Type: application/json", | |
| "-H", | |
| "Authorization: Bearer ${CHUTES_API_KEY}", | |
| ] | |
| for header in headers: | |
| parts.extend(["-H", header]) | |
| parts.extend([ | |
| "-X", | |
| "POST", | |
| "--data-binary", | |
| data, | |
| endpoint, | |
| ]) | |
| return " ".join(shlex.quote(part) for part in parts) | |
| def main(argv: List[str] | None = None) -> int: | |
| load_dotenv(find_dotenv(), override=False) | |
| if shutil.which("curl") is None: | |
| raise SystemExit("curl binary not found on PATH. Install curl to use this script.") | |
| argv = argv or [] | |
| if not argv: | |
| argv = ["--execute"] | |
| default_timeout = _env_float("SCILLM_SANITY_TIMEOUT_S", 30.0) | |
| parser = argparse.ArgumentParser( | |
| description="Chutes experimental JSON sanity via curl (no SciLLM dependency)" | |
| ) | |
| mode = parser.add_mutually_exclusive_group() | |
| mode.add_argument("--dry-run", action="store_true", help="List probe payloads without executing") | |
| mode.add_argument("--execute", action="store_true", help="Perform live requests (default)") | |
| parser.add_argument( | |
| "--request-timeout-s", | |
| type=float, | |
| default=default_timeout, | |
| help="curl --max-time value per request", | |
| ) | |
| parser.add_argument( | |
| "--endpoint-path", | |
| default="/chat/completions", | |
| help="Relative path appended to CHUTES_API_BASE (default: /chat/completions)", | |
| ) | |
| parser.add_argument("--model", dest="model_override", help="Override CHUTES_EXPERIMENTAL for this run") | |
| parser.add_argument("--verbose", action="store_true", help="Print per-scenario progress") | |
| parser.add_argument( | |
| "--verbose-json", | |
| action="store_true", | |
| help="Print the full JSON response body for each scenario", | |
| ) | |
| parser.add_argument("--json-summary", action="store_true", help="Print machine-readable JSON summary") | |
| parser.add_argument("--details", action="store_true", help="Show per-scenario PASS/FAIL rows") | |
| parser.add_argument( | |
| "--json-sanitize", | |
| dest="json_sanitize", | |
| action="store_true", | |
| default=os.getenv("SCILLM_JSON_SANITIZE", "0").lower() in {"1", "true", "yes", "on"}, | |
| help="Attempt to repair JSON responses on parse failure", | |
| ) | |
| parser.add_argument("--no-json-sanitize", dest="json_sanitize", action="store_false") | |
| parser.add_argument( | |
| "--header", | |
| action="append", | |
| default=[], | |
| help="Additional HTTP header (key: value). May be repeated.", | |
| ) | |
| parser.add_argument( | |
| "--print-curl", | |
| action="store_true", | |
| help="Show the curl command used for each scenario (Authorization header masked)", | |
| ) | |
| args = parser.parse_args(argv) | |
| if not args.dry_run and not args.execute: | |
| args.execute = True | |
| base = os.environ.get("CHUTES_API_BASE", "").strip() | |
| key = os.environ.get("CHUTES_API_KEY", "").strip() | |
| model_name = (args.model_override or os.environ.get("CHUTES_EXPERIMENTAL", "")).strip() | |
| if not base or not key or not model_name: | |
| raise SystemExit("Missing CHUTES_API_BASE, CHUTES_API_KEY, or CHUTES_EXPERIMENTAL environment variables.") | |
| endpoint = f"{base.rstrip('/')}{args.endpoint_path if args.endpoint_path.startswith('/') else '/' + args.endpoint_path}" | |
| system_prompt = "You must respond with strictly valid JSON that satisfies the requested schema." | |
| scenario_defs = _scenario_definitions(system_prompt) | |
| requests: List[Dict[str, Any]] = [] | |
| for entry in scenario_defs: | |
| req = { | |
| "model": model_name, | |
| "messages": entry["messages"], | |
| "response_format": entry["response_format"], | |
| "max_tokens": entry["max_tokens"], | |
| "temperature": entry["temperature"], | |
| } | |
| requests.append({"scenario": entry["scenario"], "payload": req}) | |
| if args.dry_run and not args.execute: | |
| preview = { | |
| "mode": "dry-run", | |
| "count": len(requests), | |
| "model": model_name, | |
| "endpoint": endpoint, | |
| "scenarios": [item["scenario"] for item in requests], | |
| } | |
| if args.print_curl: | |
| preview["curl_examples"] = { | |
| item["scenario"]: _format_curl_preview(endpoint, item["payload"], args.request_timeout_s, args.header) | |
| for item in requests | |
| } | |
| print(json.dumps(preview, ensure_ascii=False, indent=2)) | |
| return 0 | |
| items: List[Dict[str, Any]] = [] | |
| success = 0 | |
| last_error = None | |
| start = time.time() | |
| for idx, entry in enumerate(requests): | |
| scenario = entry["scenario"] | |
| payload = entry["payload"] | |
| if args.print_curl or args.verbose: | |
| preview_cmd = _format_curl_preview(endpoint, payload, args.request_timeout_s, args.header) | |
| if args.print_curl: | |
| print(f"CURL {scenario}: {preview_cmd}") | |
| result = _invoke_curl(endpoint, key, payload, args.request_timeout_s, args.header) | |
| content_head = None | |
| parsed_payload = None | |
| reason = None | |
| ok = result["ok"] | |
| content_text = None | |
| meta_response = None | |
| if not ok: | |
| reason = result.get("error") or "curl_failed" | |
| else: | |
| content_text, extraction_error, meta_response = _extract_message_and_json(result["body"]) | |
| if not content_text: | |
| ok = False | |
| reason = extraction_error or "missing_content" | |
| else: | |
| try: | |
| parsed_payload = json.loads(content_text) | |
| except json.JSONDecodeError: | |
| if args.json_sanitize: | |
| repaired = _repair_json_string(content_text) | |
| if repaired: | |
| try: | |
| parsed_payload = json.loads(repaired) | |
| content_text = repaired | |
| except json.JSONDecodeError: | |
| parsed_payload = None | |
| if parsed_payload is None: | |
| ok = False | |
| reason = "invalid_json" | |
| if parsed_payload is not None and ok: | |
| ok, reason = _validate_payload(scenario, parsed_payload) | |
| if ok: | |
| success += 1 | |
| else: | |
| last_error = reason or last_error | |
| content_head = (content_text or "")[:160].replace("\n", " ") if content_text else (result.get("body", "")[:160].replace("\n", " ") if result.get("body") else None) | |
| if args.verbose: | |
| status_label = "OK" if ok else "ERR" | |
| snippet = content_head or "" | |
| print(f"SCENARIO {scenario} -> {status_label} {snippet}") | |
| if args.verbose_json: | |
| status_label = "PASS" if ok else "FAIL" | |
| reason_label = "ok" if ok else (reason or "unknown") | |
| divider = "=" * 24 | |
| print( | |
| f"\n{divider} {scenario} | {status_label} ({reason_label}) {divider}" | |
| ) | |
| body_preview = result.get("body") or "" | |
| print(body_preview if body_preview else "<empty body>") | |
| print(divider * 2) | |
| items.append( | |
| { | |
| "index": idx, | |
| "scenario": scenario, | |
| "ok": ok, | |
| "reason": reason, | |
| "curl_status": result.get("status_code"), | |
| "curl_exit": result.get("returncode"), | |
| "elapsed_s": round(result.get("elapsed_s", 0.0), 3), | |
| "content_head": content_head, | |
| } | |
| ) | |
| elapsed = round(time.time() - start, 3) | |
| failure = len(items) - success | |
| summary = { | |
| "ok": success == len(items) and (last_error is None), | |
| "count": len(items), | |
| "success_count": success, | |
| "failure_count": failure, | |
| "error": last_error, | |
| "model": model_name, | |
| "endpoint": endpoint, | |
| "items": items, | |
| "elapsed_s": elapsed, | |
| } | |
| verdict = "PASS" if summary["ok"] else "FAIL" | |
| reason_counts: Dict[str, int] = {} | |
| for item in items: | |
| if item.get("ok"): | |
| continue | |
| label = item.get("reason") or "unknown" | |
| reason_counts[label] = reason_counts.get(label, 0) + 1 | |
| if reason_counts: | |
| reason_bits = ", ".join(f"{label}×{count}" for label, count in sorted(reason_counts.items())) | |
| else: | |
| reason_bits = "all_ok" | |
| print( | |
| f"RESULT {verdict} {success}/{len(items)} model={model_name} elapsed_s={elapsed} reasons={reason_bits}" | |
| ) | |
| if args.json_summary: | |
| print(json.dumps(summary, ensure_ascii=False)) | |
| if args.details or args.json_summary: | |
| print( | |
| f"SUMMARY chutes_experimental_json_curl ok={1 if summary['ok'] else 0} " | |
| f"count={len(items)} success={success} failure={failure} elapsed_s={elapsed}" | |
| ) | |
| if args.details: | |
| for item in items: | |
| status = "PASS" if item.get("ok") else "FAIL" | |
| reason = item.get("reason") or "ok" | |
| snippet = item.get("content_head") or "" | |
| if snippet: | |
| print(f"{status} {item['scenario']}: {reason} | {snippet}") | |
| else: | |
| print(f"{status} {item['scenario']}: {reason}") | |
| return 0 if summary["ok"] else 1 | |
| if __name__ == "__main__": | |
| raise SystemExit(main(sys.argv[1:])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment