Skip to content

Instantly share code, notes, and snippets.

@jleechan2015
Created May 24, 2026 05:30
Show Gist options
  • Select an option

  • Save jleechan2015/3d33018ee328ace45c19481e1375734c to your computer and use it in GitHub Desktop.

Select an option

Save jleechan2015/3d33018ee328ace45c19481e1375734c to your computer and use it in GitHub Desktop.
PR7064 terminal E2E evidence iteration_010 (ZFC level-up atomicity)

Evidence Package: test_level_up_organic

Package Manifest

  • Test Name: test_level_up_organic
  • Run ID: test_level_up_organic-010-20260524T045750
  • Iteration: 10
  • Bundle Version: 1.2.0
  • Collected At (UTC): 2026-05-24T04:57:50.582284+00:00
  • Repository: worldarchitect.ai
  • Branch: pr6958-zfc-evidence-followups
  • Commit: df70b184d6a7c749ecdf1d7605903c44161f267e
  • Merge Base: e6ab5b1cbabcb98b7230ec21cc2699b98154595a
  • Commits Ahead of Main: 38

Git Provenance

.beads/issues.jsonl                                |    23 +
 .claude/skills/code-standards/SKILL.md             |    14 +-
 .claude/skills/repro-twin-clone-evidence/SKILL.md  |    14 +-
 docs/evidence/pr-7064/README.md                    |    79 +
 docs/evidence/pr-7064/README.md.sha256             |     1 +
 docs/evidence/pr-7064/artifacts/collection_log.txt |    35 +
 .../pr-7064/artifacts/collection_log.txt.sha256    |     1 +
 docs/evidence/pr-7064/artifacts/lsof_output.txt    |     3 +
 .../pr-7064/artifacts/lsof_output.txt.sha256       |     1 +
 docs/evidence/pr-7064/artifacts/ps_output.txt      |     2 +
 .../pr-7064/artifacts/ps_output.txt.sha256         |     1 +
 docs/evidence/pr-7064/artifacts/server.log         | 18439 ++++++++++++++++++
 docs/evidence/pr-7064/artifacts/server.log.sha256  |     1 +
 docs/evidence/pr-7064/browser/README.md            |   130 +
 docs/evidence/pr-7064/browser/README.md.sha256     |     1 +
 .../pr-7064/browser/artifacts/browser_trace.json   |   268 +
 .../browser/artifacts/browser_trace.json.sha256    |     1 +
 .../pr-7064/browser/artifacts/collection_log.txt   |    47 +
 .../browser/artifacts/collection_log.txt.sha256    |     1 +
 .../pr-7064/browser/artifacts/lsof_output.txt      |     3 +
 .../browser/artifacts/lsof_output.txt.sha256       |     1 +
 .../artifacts/pending_backend_snapshot.json        |   130 +
 .../artifacts/pending_backend_snapshot.json.sha256 |     1 +
 .../pending_level_up_projection_response.json      |  1365 ++
 ...ending_level_up_projection_response.json.sha256 |     1 +
 .../browser/artifacts/pending_snapshot.json        |   104 +
 .../browser/artifacts/pending_snapshot.json.sha256 |     1 +
 .../pr-7064/browser/artifacts/ps_output.txt        |     2 +
 .../pr-7064/browser/artifacts/ps_output.txt.sha256 |     1 +
 .../screenshots/character_creation_not_needed.png  |   Bin 0 -> 179253 bytes
 .../screenshots/character_creation_start.png       |   Bin 0 -> 179168 bytes
 .../browser/artifacts/screenshots/game_page.png    |   Bin 0 -> 176878 bytes
 ...p_modal_lockout_browser_after_followup_turn.png |   Bin 0 -> 142826 bytes
 ...al_lockout_browser_after_hallucination_turn.png |   Bin 0 -> 223505 bytes
 ...elup_modal_lockout_browser_gameplay_started.png |   Bin 0 -> 179447 bytes
 .../artifacts/screenshots/pending_level_up_ui.png  |   Bin 0 -> 218010 bytes
 .../browser/artifacts/seeded_current_state.json    |   236 +
 .../artifacts/seeded_current_state.json.sha256     |     1 +
 docs/evidence/pr-7064/browser/artifacts/server.log | 19327 +++++++++++++++++++
 .../pr-7064/browser/artifacts/server.log.sha256    |     1 +
 .../[email protected]      |    41 +
 .../[email protected]     |   Bin 0 -> 7627071 bytes
 ...city_browser_browser-test-1779594546_253478.vtt |    41 +
 .../browser/artifacts/videos/video_manifest.json   |     9 +
 .../browser/artifacts/videos/video_manifest.lock   |     0
 .../campaigns/Browser Test Campaign_rkUucc2M.txt   |    61 +
 .../Browser Test Campaign_rkUucc2M_game_state.json |   295 +
 .../Lockout Guard Browser Test_ENpUNby7.txt        |    75 +
 ...out Guard Browser Test_ENpUNby7_game_state.json |   318 +
 docs/evidence/pr-7064/browser/evidence.md          |    68 +
 docs/evidence/pr-7064/browser/evidence.md.sha256   |     1 +
 .../browser/gemini_http_request_responses.jsonl    |    16 +
 .../gemini_http_request_responses.jsonl.sha256     |     1 +
 .../pr-7064/browser/http_request_responses.jsonl   |   272 +
 .../browser/http_request_responses.jsonl.sha256    |     1 +
 .../pr-7064/browser/llm_request_responses.jsonl    |    12 +
 .../browser/llm_request_responses.jsonl.sha256     |     1 +
 docs/evidence/pr-7064/browser/metadata.json        |   107 +
 docs/evidence/pr-7064/browser/metadata.json.sha256 |     1 +
 docs/evidence/pr-7064/browser/methodology.md       |     8 +
 .../evidence/pr-7064/browser/methodology.md.sha256 |     1 +
 docs/evidence/pr-7064/browser/notes.md             |    31 +
 docs/evidence/pr-7064/browser/notes.md.sha256      |     1 +
 ...flash-preview_levelup_modal_lockout_browser.txt |   157 +
 ...review_levelup_modal_lockout_browser.txt.sha256 |     1 +
 ...3-flash-preview_pending_level_up_projection.txt |   126 +
 ...-preview_pending_level_up_projection.txt.sha256 |     1 +
 .../raw_gemini_http_request_responses.jsonl        |    16 +
 .../browser/raw_http_request_responses.jsonl       |   272 +
 .../browser/raw_llm_request_responses.jsonl        |    12 +
 .../pr-7064/browser/request_responses.jsonl        |   272 +
 .../pr-7064/browser/request_responses.jsonl.sha256 |     1 +
 docs/evidence/pr-7064/browser/run.json             |    93 +
 docs/evidence/pr-7064/browser/run.json.sha256      |     1 +
 .../pr-7064/browser/streaming_evidence.json        |    39 +
 .../pr-7064/browser/streaming_evidence.json.sha256 |     1 +
 .../Organic Level-Up Progression Test_avafxHQ5.txt |   349 +
 ...el-Up Progression Test_avafxHQ5_game_state.json |   331 +
 .../pr-7064/campaigns/avafxHQ50msKBdKQKmI5.json    |   380 +
 .../campaigns/avafxHQ50msKBdKQKmI5.json.sha256     |     1 +
 docs/evidence/pr-7064/evidence.md                  |    72 +
 docs/evidence/pr-7064/evidence.md.sha256           |     1 +
 .../pr-7064/gemini_http_request_responses.jsonl    |    34 +
 .../gemini_http_request_responses.jsonl.sha256     |     1 +
 ...mini_http_request_responses_1779593448252.jsonl |    34 +
 docs/evidence/pr-7064/http_request_responses.jsonl |   842 +
 .../pr-7064/http_request_responses.jsonl.sha256    |     1 +
 .../http_request_responses_1779593448252.jsonl     |   842 +
 docs/evidence/pr-7064/llm_request_responses.jsonl  |    34 +
 .../pr-7064/llm_request_responses.jsonl.sha256     |     1 +
 .../llm_request_responses_1779593448252.jsonl      |    34 +
 docs/evidence/pr-7064/metadata.json                |   107 +
 docs/evidence/pr-7064/metadata.json.sha256         |     1 +
 docs/evidence/pr-7064/methodology.md               |    38 +
 docs/evidence/pr-7064/methodology.md.sha256        |     1 +
 docs/evidence/pr-7064/notes.md                     |    33 +
 docs/evidence/pr-7064/notes.md.sha256              |     1 +
 ...ini-3-flash-preview_single_organic_level_up.txt |    38 +
 ...lash-preview_single_organic_level_up.txt.sha256 |     1 +
 .../raw_unknown_model_evidence_signature_guard.txt |     1 +
 ...known_model_evidence_signature_guard.txt.sha256 |     1 +
 ...n_model_finish_intent_prompt_and_classifier.txt |     1 +
 ..._finish_intent_prompt_and_classifier.txt.sha256 |     1 +
 .../pr-7064/replay_fixture_source_manifest.json    |    67 +
 docs/evidence/pr-7064/request_responses.jsonl      |    76 +
 .../pr-7064/request_responses.jsonl.sha256         |     1 +
 docs/evidence/pr-7064/run.json                     |   178 +
 docs/evidence/pr-7064/run.json.sha256              |     1 +
 .../pr-7064/scenario_results_checkpoint.json       |     1 +
 .../scenario_results_checkpoint.json.sha256        |     1 +
 docs/evidence/pr-7064/streaming_evidence.json      |    27 +
 .../pr-7064/streaming_evidence.json.sha256         |     1 +
 docs/evidence/pr-7064/test_console_output.txt      |    82 +
 .../pr-7064/test_console_output.txt.sha256         |     1 +
 docs/evidence/pr-7064/test_level_up_organic.cast   |   119 +
 .../pr-7064/test_level_up_organic.cast.sha256      |     1 +
 mvp_site/agents.py                                 |    37 +-
 mvp_site/backend_adjustment_registry.py            |   168 +-
 mvp_site/backend_adjustment_specs.py               |   989 +
 mvp_site/frontend_v1/app.js                        |    33 +-
 mvp_site/game_state.py                             |    39 +-
 mvp_site/llm_service.py                            |   110 +
 mvp_site/prompts/level_up_instruction.md           |    38 +
 mvp_site/prompts/planning_protocol.md              |    26 +-
 mvp_site/rewards_engine.py                         |    91 +
 mvp_site/schemas/prompt_tool_contracts.json        |     4 +-
 mvp_site/session_header_utils.py                   |    27 +-
 .../frontend/test_app_js_structured_fields.js      |     2 +-
 .../test_agent_routing_with_state_validation.py    |     2 +-
 mvp_site/tests/test_backend_adjustment_registry.py |    93 +-
 mvp_site/tests/test_llm_service_context.py         |    15 +-
 mvp_site/tests/test_rewards_engine.py              |    67 +
 mvp_site/tests/test_session_header_enrichment.py   |    80 +
 mvp_site/tests/test_world_logic.py                 |   541 +-
 mvp_site/world_logic.py                            |    51 +-
 roadmap/README.md                                  |     1 +
 ...teps-2026-05-24-pr6958-adjuster-registration.md |   126 +
 ...-05-24-pr7048-location-centralization-review.md |    72 +
 scripts/copy_campaign.py                           |     9 +
 testing_mcp/core/test_level_up_organic.py          |     4 +-
 testing_mcp/lib/base_test.py                       |    32 +-
 testing_mcp/lib/llm_response_cache/config.py       |    29 +-
 .../lib/llm_response_cache/prompt_fingerprint.py   |     5 +
 testing_mcp/lib/llm_response_cache/server_cache.py |   236 +
 .../lib/llm_response_cache/tests/test_config.py    |    27 +
 145 files changed, 49026 insertions(+), 254 deletions(-)

Server Runtime

  • Port: 8074
  • PID: 66812
  • Command: /opt/homebrew/Cellar/[email protected]/3.12.11/Frameworks/Python.framework/Versions/3.12/Resources/Python.app/Contents/MacOS/Python -m gunicorn mvp_site.main:app --bind 0.0.0.0:8074 --workers 1 --worker-class gthread --threads 4 --timeout 600 --max-requests 1000 --access-logfile - --error-logfile - --log-level info

Environment Variables

  • WORLDAI_DEV_MODE: true
  • TESTING: None
  • MOCK_SERVICES_MODE: false
  • GOOGLE_APPLICATION_CREDENTIALS: [SET - file:serviceAccountKey.json]
  • WORLDAI_GOOGLE_APPLICATION_CREDENTIALS: [SET - file:serviceAccountKey.json]
  • FIRESTORE_EMULATOR_HOST: None
  • PORT: 8074
  • FIREBASE_PROJECT_ID: worldarchitecture-ai
  • GEMINI_API_KEY: [SET - 39 chars]
  • LLM_REQUEST_RESPONSE_CAPTURE_PATH: /tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/iteration_010/llm_request_responses_1779598181114.jsonl
  • HTTP_REQUEST_RESPONSE_CAPTURE_PATH: /tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/iteration_010/http_request_responses_1779598181114.jsonl
  • GEMINI_HTTP_REQUEST_RESPONSE_CAPTURE_PATH: /tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/iteration_010/gemini_http_request_responses_1779598181114.jsonl
  • MCP_TEST_PROVIDER_HTTP_CAPTURE_PATH: /tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/iteration_010/provider_http_request_responses_1779598181114.jsonl

Files in This Bundle

  • README.md - This manifest
  • methodology.md - Testing methodology
  • evidence.md - Evidence summary with Claim→Artifact Map and Coverage Matrix
  • notes.md - Additional context, TODOs, follow-ups
  • metadata.json - Machine-readable metadata
  • assertions.json - Strict before/after parity assertions (if present)
  • run.json - Test results
    • streaming_evidence.json - Normalized streaming evidence summary
    • request_responses.jsonl - Raw MCP request/response payloads (if present)
    • llm_request_responses.jsonl - Raw LLM request/response payloads (if present)
    • http_request_responses.jsonl - Raw local-server HTTP request/response payloads (if present)
    • gemini_http_request_responses.jsonl - Raw Gemini transport HTTP traces (if present)
    • artifacts/ - Additional evidence files
{
"generated_at_utc": "2026-05-24T04:57:50.463649+00:00",
"test_name": "test_level_up_organic",
"work_name": "test_level_up_organic",
"server_base_url": "http://127.0.0.1:8074",
"using_external_server": false,
"user_id": "test-test_level_up_organic-1779598181",
"failure_messages": [
"single_organic_level_up_final: codex leveling review did not pass; output=VERDICT: FAIL\n- Blocking: Paladin spell preparation is a player-selectable level-up decision, but the modal planning choices expose only HP and Fighting Style edits. No `level_up_*` choice lets the player edit prepared spells before finish.\n- Blocking: The first modal response\u2019s visible `Recommended package:` lists HP, Defense Fighting Style, and prepared spells, but does not clearly distinguish automatic gains from editable selections or visibly account for all automatic Level 2 Paladin gains in the package itself.\n- Passing: `level_up_now` opens the modal without committing level 2; final level commit appears only on `finish_level_up_return_to_game`.\n- Passing: Recommended HP/Fighting Style selections are prefilled, editable, and free-form Fighting Style edit keeps the modal open and updates the recommendation.\n- Passing: After finish, final state has `level=2`, `level_up_pending=false`, `level_up_in_progress=false`, no active level-up choices in the final planning block, and resumes real story choices.\n- No legacy `level_up_signal.level_up` boolean found; observed `level_up_signal` entries use `current_level` + `target_level`."
],
"http_probes": {
"/health": {
"ok": true,
"status": 200,
"body_excerpt": "{\"mcp_client\":{\"initialized\":false},\"service\":\"worldarchitect-ai\",\"status\":\"healthy\",\"timestamp\":\"2026-05-24T04:57:50.464597+00:00\"}\n",
"is_json_api": true,
"content_type": "application/json"
},
"/mcp": {
"ok": true,
"status": 200,
"body_excerpt": "<!doctype html>\n<html lang=\"en\">\n\n<head>\n <meta charset=\"UTF-8\" />\n <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n <link rel=\"icon\" type=\"image/svg+xml\" href=\"/frontend_v1/dragon-favicon.svg\" />\n <title>WorldAI</title>\n <!-- DNS prefetch for external domains to reduce",
"is_json_api": false,
"content_type": "text/html; charset=utf-8"
},
"/settings": {
"ok": true,
"status": 200,
"body_excerpt": "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n <meta charset=\"UTF-8\">\n <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n <script src=\"/frontend_v1/js/theme-bootstrap.js\"></script>\n <link rel=\"icon\" type=\"image/svg+xml\" href=\"/frontend_v1/dragon-favicon.svg\">\n <title>Se",
"is_json_api": false,
"content_type": "text/html; charset=utf-8"
}
},
"mcp_probes": {
"get_user_settings": {
"ok": true,
"payload": {
"cerebras_model": "qwen-3-235b-a22b-instruct-2507",
"gemini_model": "gemini-3-flash-preview",
"has_custom_cerebras_key": false,
"has_custom_gemini_key": false,
"has_custom_openclaw_gateway_token": false,
"has_custom_openclaw_key": false,
"has_custom_openrouter_key": false,
"llm_provider": "gemini",
"openclaw_gateway_port": 18789,
"openclaw_gateway_url": "",
"openrouter_model": "meta-llama/llama-3.1-70b-instruct",
"success": true
}
}
},
"openclaw_endpoint_probes": [
{
"target": "http://127.0.0.1:18789/v1/models",
"probe": {
"ok": false,
"error": "<urlopen error [Errno 61] Connection refused>"
}
}
],
"openclaw_settings": {
"llm_provider": "gemini",
"openclaw_gateway_port": 18789,
"openclaw_gateway_url": ""
}
}

Evidence Summary: test_level_up_organic

Test Results

  • Total Scenarios: 3
  • Scenario Validation Passed: 2
  • Scenario Validation Failed: 1
  • Scenario Validation Pass Rate: 66.7%
  • Raw LLM Layer Passed: 1/2 (50.0%)

⚠️ Post-Processing Detected Additional Issues

  • Raw Layer Pass Rate: 1/2 (50.0%)
  • Post-Processing Pass Rate (raw-validated scenarios): 0/1 (0.0%)

Post-processing detected issues (dm_notes, core_memories, state mutations) that the raw narrative validation missed. See errors in individual scenario files.

  • Post-Processing Campaign Capture Passed: 1
  • Post-Processing Campaign Capture Failed: 0
  • Post-Processing Campaign Capture Pass Rate: 100.0%

Scenario Results

finish_intent_prompt_and_classifier

  • Status: ✅ PASS

single_organic_level_up

  • Status: ❌ FAIL
  • Campaign ID: A0b6WSmVa22xyVcs48VL
  • Errors: ['single_organic_level_up_final: codex leveling review did not pass; output=VERDICT: FAIL\n- Blocking: Paladin spell preparation is a player-selectable level-up decision, but the modal planning choices expose only HP and Fighting Style edits. No level_up_* choice lets the player edit prepared spells before finish.\n- Blocking: The first modal response’s visible Recommended package: lists HP, Defense Fighting Style, and prepared spells, but does not clearly distinguish automatic gains from editable selections or visibly account for all automatic Level 2 Paladin gains in the package itself.\n- Passing: level_up_now opens the modal without committing level 2; final level commit appears only on finish_level_up_return_to_game.\n- Passing: Recommended HP/Fighting Style selections are prefilled, editable, and free-form Fighting Style edit keeps the modal open and updates the recommendation.\n- Passing: After finish, final state has level=2, level_up_pending=false, level_up_in_progress=false, no active level-up choices in the final planning block, and resumes real story choices.\n- No legacy level_up_signal.level_up boolean found; observed level_up_signal entries use current_level + target_level.']

EVIDENCE_SIGNATURE_GUARD

  • Status: ✅ PASS

Provenance Chain

  • Git HEAD: df70b184d6a7c749ecdf1d7605903c44161f267e
  • Test Timestamp: 2026-05-24T04:57:50.582284+00:00
  • Server PID: 66812

Claim → Artifact Map

Claim File Key Field(s)
Scenario validation passed: 2/3 run.json scenarios[].passed, scenarios[].errors
Campaign post-processing capture passed: 1/1 run.json campaign_capture_status[*].status
Streaming evidence normalized streaming_evidence.json summary., scenarios[].chunk_count_observed
Bundle artifact inventory artifacts/collection_log.txt core_files, jsonl_captures, campaigns_dir
MCP request/response captured request_responses.jsonl Full request/response pairs
Local server HTTP request/response captured http_request_responses.jsonl http_request/http_response entries
LLM request/response stream captured llm_request_responses.jsonl request/response entries (type field)
Gemini HTTP transport captured gemini_http_request_responses.jsonl http_request/http_response/transport_error entries
Server execution log artifacts/server.log Raw server output
Git provenance metadata.json git_provenance.git_head = df70b184...

Coverage Matrix

Scenario Status Campaign ID
finish_intent_prompt_and_classifier ✅ Pass None
single_organic_level_up ❌ Fail A0b6WSmV...
EVIDENCE_SIGNATURE_GUARD ✅ Pass N/A

Evidence Integrity

  • All files in this bundle have corresponding .sha256 checksum files

  • Checksums use local basename paths so per-file verification works from each artifact directory

  • ⚠️ Server warnings detected (see artifacts/server.log)

  • Warning: ACTION_RESOLUTION_MISSING_FIELDS

What This Evidence Proves vs. Does NOT Prove

Proves:

  • Core logic and scenario validation for test_level_up_organic
  • Scenario execution pass rates (2/3)

Does NOT Prove:

  • Production server behavior (tested on local server unless otherwise noted)
  • Performance under load (single-request tests)
  • Edge cases not covered by scenarios
{
"test_name": "test_level_up_organic",
"run_id": "test_level_up_organic-010-20260524T045750",
"iteration": 10,
"bundle_version": "1.2.0",
"timestamp": "2026-05-24T04:57:50.582284+00:00",
"bundle_timestamp": "2026-05-24T04:57:50.582284+00:00",
"evidence_mode": "lightweight_prompt_tracking",
"evidence_mode_notes": "System instruction captured as filenames + char_count (not full text). Raw LLM request/response payloads captured in request_responses.jsonl. Server logs in artifacts/. Bundle file inventory in artifacts/collection_log.txt.",
"git_provenance": {
"git_head": "df70b184d6a7c749ecdf1d7605903c44161f267e",
"git_branch": "pr6958-zfc-evidence-followups",
"merge_base": "e6ab5b1cbabcb98b7230ec21cc2699b98154595a",
"commits_ahead_of_main": 38,
"diff_stat_vs_main": ".beads/issues.jsonl | 23 +\n .claude/skills/code-standards/SKILL.md | 14 +-\n .claude/skills/repro-twin-clone-evidence/SKILL.md | 14 +-\n docs/evidence/pr-7064/README.md | 79 +\n docs/evidence/pr-7064/README.md.sha256 | 1 +\n docs/evidence/pr-7064/artifacts/collection_log.txt | 35 +\n .../pr-7064/artifacts/collection_log.txt.sha256 | 1 +\n docs/evidence/pr-7064/artifacts/lsof_output.txt | 3 +\n .../pr-7064/artifacts/lsof_output.txt.sha256 | 1 +\n docs/evidence/pr-7064/artifacts/ps_output.txt | 2 +\n .../pr-7064/artifacts/ps_output.txt.sha256 | 1 +\n docs/evidence/pr-7064/artifacts/server.log | 18439 ++++++++++++++++++\n docs/evidence/pr-7064/artifacts/server.log.sha256 | 1 +\n docs/evidence/pr-7064/browser/README.md | 130 +\n docs/evidence/pr-7064/browser/README.md.sha256 | 1 +\n .../pr-7064/browser/artifacts/browser_trace.json | 268 +\n .../browser/artifacts/browser_trace.json.sha256 | 1 +\n .../pr-7064/browser/artifacts/collection_log.txt | 47 +\n .../browser/artifacts/collection_log.txt.sha256 | 1 +\n .../pr-7064/browser/artifacts/lsof_output.txt | 3 +\n .../browser/artifacts/lsof_output.txt.sha256 | 1 +\n .../artifacts/pending_backend_snapshot.json | 130 +\n .../artifacts/pending_backend_snapshot.json.sha256 | 1 +\n .../pending_level_up_projection_response.json | 1365 ++\n ...ending_level_up_projection_response.json.sha256 | 1 +\n .../browser/artifacts/pending_snapshot.json | 104 +\n .../browser/artifacts/pending_snapshot.json.sha256 | 1 +\n .../pr-7064/browser/artifacts/ps_output.txt | 2 +\n .../pr-7064/browser/artifacts/ps_output.txt.sha256 | 1 +\n .../screenshots/character_creation_not_needed.png | Bin 0 -> 179253 bytes\n .../screenshots/character_creation_start.png | Bin 0 -> 179168 bytes\n .../browser/artifacts/screenshots/game_page.png | Bin 0 -> 176878 bytes\n ...p_modal_lockout_browser_after_followup_turn.png | Bin 0 -> 142826 bytes\n ...al_lockout_browser_after_hallucination_turn.png | Bin 0 -> 223505 bytes\n ...elup_modal_lockout_browser_gameplay_started.png | Bin 0 -> 179447 bytes\n .../artifacts/screenshots/pending_level_up_ui.png | Bin 0 -> 218010 bytes\n .../browser/artifacts/seeded_current_state.json | 236 +\n .../artifacts/seeded_current_state.json.sha256 | 1 +\n docs/evidence/pr-7064/browser/artifacts/server.log | 19327 +++++++++++++++++++\n .../pr-7064/browser/artifacts/server.log.sha256 | 1 +\n .../[email protected] | 41 +\n .../[email protected] | Bin 0 -> 7627071 bytes\n ...city_browser_browser-test-1779594546_253478.vtt | 41 +\n .../browser/artifacts/videos/video_manifest.json | 9 +\n .../browser/artifacts/videos/video_manifest.lock | 0\n .../campaigns/Browser Test Campaign_rkUucc2M.txt | 61 +\n .../Browser Test Campaign_rkUucc2M_game_state.json | 295 +\n .../Lockout Guard Browser Test_ENpUNby7.txt | 75 +\n ...out Guard Browser Test_ENpUNby7_game_state.json | 318 +\n docs/evidence/pr-7064/browser/evidence.md | 68 +\n docs/evidence/pr-7064/browser/evidence.md.sha256 | 1 +\n .../browser/gemini_http_request_responses.jsonl | 16 +\n .../gemini_http_request_responses.jsonl.sha256 | 1 +\n .../pr-7064/browser/http_request_responses.jsonl | 272 +\n .../browser/http_request_responses.jsonl.sha256 | 1 +\n .../pr-7064/browser/llm_request_responses.jsonl | 12 +\n .../browser/llm_request_responses.jsonl.sha256 | 1 +\n docs/evidence/pr-7064/browser/metadata.json | 107 +\n docs/evidence/pr-7064/browser/metadata.json.sha256 | 1 +\n docs/evidence/pr-7064/browser/methodology.md | 8 +\n .../evidence/pr-7064/browser/methodology.md.sha256 | 1 +\n docs/evidence/pr-7064/browser/notes.md | 31 +\n docs/evidence/pr-7064/browser/notes.md.sha256 | 1 +\n ...flash-preview_levelup_modal_lockout_browser.txt | 157 +\n ...review_levelup_modal_lockout_browser.txt.sha256 | 1 +\n ...3-flash-preview_pending_level_up_projection.txt | 126 +\n ...-preview_pending_level_up_projection.txt.sha256 | 1 +\n .../raw_gemini_http_request_responses.jsonl | 16 +\n .../browser/raw_http_request_responses.jsonl | 272 +\n .../browser/raw_llm_request_responses.jsonl | 12 +\n .../pr-7064/browser/request_responses.jsonl | 272 +\n .../pr-7064/browser/request_responses.jsonl.sha256 | 1 +\n docs/evidence/pr-7064/browser/run.json | 93 +\n docs/evidence/pr-7064/browser/run.json.sha256 | 1 +\n .../pr-7064/browser/streaming_evidence.json | 39 +\n .../pr-7064/browser/streaming_evidence.json.sha256 | 1 +\n .../Organic Level-Up Progression Test_avafxHQ5.txt | 349 +\n ...el-Up Progression Test_avafxHQ5_game_state.json | 331 +\n .../pr-7064/campaigns/avafxHQ50msKBdKQKmI5.json | 380 +\n .../campaigns/avafxHQ50msKBdKQKmI5.json.sha256 | 1 +\n docs/evidence/pr-7064/evidence.md | 72 +\n docs/evidence/pr-7064/evidence.md.sha256 | 1 +\n .../pr-7064/gemini_http_request_responses.jsonl | 34 +\n .../gemini_http_request_responses.jsonl.sha256 | 1 +\n ...mini_http_request_responses_1779593448252.jsonl | 34 +\n docs/evidence/pr-7064/http_request_responses.jsonl | 842 +\n .../pr-7064/http_request_responses.jsonl.sha256 | 1 +\n .../http_request_responses_1779593448252.jsonl | 842 +\n docs/evidence/pr-7064/llm_request_responses.jsonl | 34 +\n .../pr-7064/llm_request_responses.jsonl.sha256 | 1 +\n .../llm_request_responses_1779593448252.jsonl | 34 +\n docs/evidence/pr-7064/metadata.json | 107 +\n docs/evidence/pr-7064/metadata.json.sha256 | 1 +\n docs/evidence/pr-7064/methodology.md | 38 +\n docs/evidence/pr-7064/methodology.md.sha256 | 1 +\n docs/evidence/pr-7064/notes.md | 33 +\n docs/evidence/pr-7064/notes.md.sha256 | 1 +\n ...ini-3-flash-preview_single_organic_level_up.txt | 38 +\n ...lash-preview_single_organic_level_up.txt.sha256 | 1 +\n .../raw_unknown_model_evidence_signature_guard.txt | 1 +\n ...known_model_evidence_signature_guard.txt.sha256 | 1 +\n ...n_model_finish_intent_prompt_and_classifier.txt | 1 +\n ..._finish_intent_prompt_and_classifier.txt.sha256 | 1 +\n .../pr-7064/replay_fixture_source_manifest.json | 67 +\n docs/evidence/pr-7064/request_responses.jsonl | 76 +\n .../pr-7064/request_responses.jsonl.sha256 | 1 +\n docs/evidence/pr-7064/run.json | 178 +\n docs/evidence/pr-7064/run.json.sha256 | 1 +\n .../pr-7064/scenario_results_checkpoint.json | 1 +\n .../scenario_results_checkpoint.json.sha256 | 1 +\n docs/evidence/pr-7064/streaming_evidence.json | 27 +\n .../pr-7064/streaming_evidence.json.sha256 | 1 +\n docs/evidence/pr-7064/test_console_output.txt | 82 +\n .../pr-7064/test_console_output.txt.sha256 | 1 +\n docs/evidence/pr-7064/test_level_up_organic.cast | 119 +\n .../pr-7064/test_level_up_organic.cast.sha256 | 1 +\n mvp_site/agents.py | 37 +-\n mvp_site/backend_adjustment_registry.py | 168 +-\n mvp_site/backend_adjustment_specs.py | 989 +\n mvp_site/frontend_v1/app.js | 33 +-\n mvp_site/game_state.py | 39 +-\n mvp_site/llm_service.py | 110 +\n mvp_site/prompts/level_up_instruction.md | 38 +\n mvp_site/prompts/planning_protocol.md | 26 +-\n mvp_site/rewards_engine.py | 91 +\n mvp_site/schemas/prompt_tool_contracts.json | 4 +-\n mvp_site/session_header_utils.py | 27 +-\n .../frontend/test_app_js_structured_fields.js | 2 +-\n .../test_agent_routing_with_state_validation.py | 2 +-\n mvp_site/tests/test_backend_adjustment_registry.py | 93 +-\n mvp_site/tests/test_llm_service_context.py | 15 +-\n mvp_site/tests/test_rewards_engine.py | 67 +\n mvp_site/tests/test_session_header_enrichment.py | 80 +\n mvp_site/tests/test_world_logic.py | 541 +-\n mvp_site/world_logic.py | 51 +-\n roadmap/README.md | 1 +\n ...teps-2026-05-24-pr6958-adjuster-registration.md | 126 +\n ...-05-24-pr7048-location-centralization-review.md | 72 +\n scripts/copy_campaign.py | 9 +\n testing_mcp/core/test_level_up_organic.py | 4 +-\n testing_mcp/lib/base_test.py | 32 +-\n testing_mcp/lib/llm_response_cache/config.py | 29 +-\n .../lib/llm_response_cache/prompt_fingerprint.py | 5 +\n testing_mcp/lib/llm_response_cache/server_cache.py | 236 +\n .../lib/llm_response_cache/tests/test_config.py | 27 +\n 145 files changed, 49026 insertions(+), 254 deletions(-)",
"working_tree_dirty": false,
"working_tree_staged_changes": 0,
"working_tree_unstaged_changes": 0,
"working_tree_changed_files": [],
"working_tree_diff_sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
},
"server": {
"base_url": "http://127.0.0.1:8074",
"hostname": "127.0.0.1",
"mode": "local",
"port": "8074",
"pid": 66812,
"process_cmdline": "/opt/homebrew/Cellar/[email protected]/3.12.11/Frameworks/Python.framework/Versions/3.12/Resources/Python.app/Contents/MacOS/Python -m gunicorn mvp_site.main:app --bind 0.0.0.0:8074 --workers 1 --worker-class gthread --threads 4 --timeout 600 --max-requests 1000 --access-logfile - --error-logfile - --log-level info",
"env_vars": {
"WORLDAI_DEV_MODE": "true",
"TESTING": null,
"MOCK_SERVICES_MODE": "false",
"GOOGLE_APPLICATION_CREDENTIALS": "[SET - file:serviceAccountKey.json]",
"WORLDAI_GOOGLE_APPLICATION_CREDENTIALS": "[SET - file:serviceAccountKey.json]",
"FIRESTORE_EMULATOR_HOST": null,
"PORT": "8074",
"FIREBASE_PROJECT_ID": "worldarchitecture-ai",
"GEMINI_API_KEY": "[SET - 39 chars]",
"LLM_REQUEST_RESPONSE_CAPTURE_PATH": "/tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/iteration_010/llm_request_responses_1779598181114.jsonl",
"HTTP_REQUEST_RESPONSE_CAPTURE_PATH": "/tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/iteration_010/http_request_responses_1779598181114.jsonl",
"GEMINI_HTTP_REQUEST_RESPONSE_CAPTURE_PATH": "/tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/iteration_010/gemini_http_request_responses_1779598181114.jsonl",
"MCP_TEST_PROVIDER_HTTP_CAPTURE_PATH": "/tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/iteration_010/provider_http_request_responses_1779598181114.jsonl"
},
"lsof_output": "COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME\nPython 66812 jleechan 5u IPv4 0x85616efcba529c09 0t0 TCP *:8074 (LISTEN)\nPython 66831 jleechan 5u IPv4 0x85616efcba529c09 0t0 TCP *:8074 (LISTEN)",
"ps_output": "PID USER ELAPSED ARGS\n66812 jleechan 07:56 /opt/homebrew/Cellar/[email protected]/3.12.11/Frameworks/Python.framework/Versions/3.12/Resources/Python.app/Contents/MacOS/Python -m gunicorn mvp_site.main:app --bind 0.0.0.0:8074 --workers 1 --worker-class gthread --threads 4 --timeout 600 --max-requests 1000 --access-logfile - --error-logfile - --log-level info"
},
"provenance": {
"git_fetch_origin_main": {
"returncode": 0,
"stdout": null,
"stderr": "From https://github.com/jleechanorg/worldarchitect.ai\n * branch main -> FETCH_HEAD\nAuto packing the repository in background for optimum performance.\nSee \"git help gc\" for manual housekeeping.\nwarning: The last gc run reported the following. Please correct the root cause\nand remove /Users/jleechan/projects/worldarchitect.ai/.git/worktrees/worktree_autolvl/gc.log\nAutomatic cleanup will not be performed until the file is removed.\n\nwarning: There are too many unreachable loose objects; run 'git prune' to remove them."
},
"git_head": "df70b184d6a7c749ecdf1d7605903c44161f267e",
"git_branch": "pr6958-zfc-evidence-followups",
"merge_base": "e6ab5b1cbabcb98b7230ec21cc2699b98154595a",
"commits_ahead_of_main": 38,
"diff_stat_vs_main": ".beads/issues.jsonl | 23 +\n .claude/skills/code-standards/SKILL.md | 14 +-\n .claude/skills/repro-twin-clone-evidence/SKILL.md | 14 +-\n docs/evidence/pr-7064/README.md | 79 +\n docs/evidence/pr-7064/README.md.sha256 | 1 +\n docs/evidence/pr-7064/artifacts/collection_log.txt | 35 +\n .../pr-7064/artifacts/collection_log.txt.sha256 | 1 +\n docs/evidence/pr-7064/artifacts/lsof_output.txt | 3 +\n .../pr-7064/artifacts/lsof_output.txt.sha256 | 1 +\n docs/evidence/pr-7064/artifacts/ps_output.txt | 2 +\n .../pr-7064/artifacts/ps_output.txt.sha256 | 1 +\n docs/evidence/pr-7064/artifacts/server.log | 18439 ++++++++++++++++++\n docs/evidence/pr-7064/artifacts/server.log.sha256 | 1 +\n docs/evidence/pr-7064/browser/README.md | 130 +\n docs/evidence/pr-7064/browser/README.md.sha256 | 1 +\n .../pr-7064/browser/artifacts/browser_trace.json | 268 +\n .../browser/artifacts/browser_trace.json.sha256 | 1 +\n .../pr-7064/browser/artifacts/collection_log.txt | 47 +\n .../browser/artifacts/collection_log.txt.sha256 | 1 +\n .../pr-7064/browser/artifacts/lsof_output.txt | 3 +\n .../browser/artifacts/lsof_output.txt.sha256 | 1 +\n .../artifacts/pending_backend_snapshot.json | 130 +\n .../artifacts/pending_backend_snapshot.json.sha256 | 1 +\n .../pending_level_up_projection_response.json | 1365 ++\n ...ending_level_up_projection_response.json.sha256 | 1 +\n .../browser/artifacts/pending_snapshot.json | 104 +\n .../browser/artifacts/pending_snapshot.json.sha256 | 1 +\n .../pr-7064/browser/artifacts/ps_output.txt | 2 +\n .../pr-7064/browser/artifacts/ps_output.txt.sha256 | 1 +\n .../screenshots/character_creation_not_needed.png | Bin 0 -> 179253 bytes\n .../screenshots/character_creation_start.png | Bin 0 -> 179168 bytes\n .../browser/artifacts/screenshots/game_page.png | Bin 0 -> 176878 bytes\n ...p_modal_lockout_browser_after_followup_turn.png | Bin 0 -> 142826 bytes\n ...al_lockout_browser_after_hallucination_turn.png | Bin 0 -> 223505 bytes\n ...elup_modal_lockout_browser_gameplay_started.png | Bin 0 -> 179447 bytes\n .../artifacts/screenshots/pending_level_up_ui.png | Bin 0 -> 218010 bytes\n .../browser/artifacts/seeded_current_state.json | 236 +\n .../artifacts/seeded_current_state.json.sha256 | 1 +\n docs/evidence/pr-7064/browser/artifacts/server.log | 19327 +++++++++++++++++++\n .../pr-7064/browser/artifacts/server.log.sha256 | 1 +\n .../[email protected] | 41 +\n .../[email protected] | Bin 0 -> 7627071 bytes\n ...city_browser_browser-test-1779594546_253478.vtt | 41 +\n .../browser/artifacts/videos/video_manifest.json | 9 +\n .../browser/artifacts/videos/video_manifest.lock | 0\n .../campaigns/Browser Test Campaign_rkUucc2M.txt | 61 +\n .../Browser Test Campaign_rkUucc2M_game_state.json | 295 +\n .../Lockout Guard Browser Test_ENpUNby7.txt | 75 +\n ...out Guard Browser Test_ENpUNby7_game_state.json | 318 +\n docs/evidence/pr-7064/browser/evidence.md | 68 +\n docs/evidence/pr-7064/browser/evidence.md.sha256 | 1 +\n .../browser/gemini_http_request_responses.jsonl | 16 +\n .../gemini_http_request_responses.jsonl.sha256 | 1 +\n .../pr-7064/browser/http_request_responses.jsonl | 272 +\n .../browser/http_request_responses.jsonl.sha256 | 1 +\n .../pr-7064/browser/llm_request_responses.jsonl | 12 +\n .../browser/llm_request_responses.jsonl.sha256 | 1 +\n docs/evidence/pr-7064/browser/metadata.json | 107 +\n docs/evidence/pr-7064/browser/metadata.json.sha256 | 1 +\n docs/evidence/pr-7064/browser/methodology.md | 8 +\n .../evidence/pr-7064/browser/methodology.md.sha256 | 1 +\n docs/evidence/pr-7064/browser/notes.md | 31 +\n docs/evidence/pr-7064/browser/notes.md.sha256 | 1 +\n ...flash-preview_levelup_modal_lockout_browser.txt | 157 +\n ...review_levelup_modal_lockout_browser.txt.sha256 | 1 +\n ...3-flash-preview_pending_level_up_projection.txt | 126 +\n ...-preview_pending_level_up_projection.txt.sha256 | 1 +\n .../raw_gemini_http_request_responses.jsonl | 16 +\n .../browser/raw_http_request_responses.jsonl | 272 +\n .../browser/raw_llm_request_responses.jsonl | 12 +\n .../pr-7064/browser/request_responses.jsonl | 272 +\n .../pr-7064/browser/request_responses.jsonl.sha256 | 1 +\n docs/evidence/pr-7064/browser/run.json | 93 +\n docs/evidence/pr-7064/browser/run.json.sha256 | 1 +\n .../pr-7064/browser/streaming_evidence.json | 39 +\n .../pr-7064/browser/streaming_evidence.json.sha256 | 1 +\n .../Organic Level-Up Progression Test_avafxHQ5.txt | 349 +\n ...el-Up Progression Test_avafxHQ5_game_state.json | 331 +\n .../pr-7064/campaigns/avafxHQ50msKBdKQKmI5.json | 380 +\n .../campaigns/avafxHQ50msKBdKQKmI5.json.sha256 | 1 +\n docs/evidence/pr-7064/evidence.md | 72 +\n docs/evidence/pr-7064/evidence.md.sha256 | 1 +\n .../pr-7064/gemini_http_request_responses.jsonl | 34 +\n .../gemini_http_request_responses.jsonl.sha256 | 1 +\n ...mini_http_request_responses_1779593448252.jsonl | 34 +\n docs/evidence/pr-7064/http_request_responses.jsonl | 842 +\n .../pr-7064/http_request_responses.jsonl.sha256 | 1 +\n .../http_request_responses_1779593448252.jsonl | 842 +\n docs/evidence/pr-7064/llm_request_responses.jsonl | 34 +\n .../pr-7064/llm_request_responses.jsonl.sha256 | 1 +\n .../llm_request_responses_1779593448252.jsonl | 34 +\n docs/evidence/pr-7064/metadata.json | 107 +\n docs/evidence/pr-7064/metadata.json.sha256 | 1 +\n docs/evidence/pr-7064/methodology.md | 38 +\n docs/evidence/pr-7064/methodology.md.sha256 | 1 +\n docs/evidence/pr-7064/notes.md | 33 +\n docs/evidence/pr-7064/notes.md.sha256 | 1 +\n ...ini-3-flash-preview_single_organic_level_up.txt | 38 +\n ...lash-preview_single_organic_level_up.txt.sha256 | 1 +\n .../raw_unknown_model_evidence_signature_guard.txt | 1 +\n ...known_model_evidence_signature_guard.txt.sha256 | 1 +\n ...n_model_finish_intent_prompt_and_classifier.txt | 1 +\n ..._finish_intent_prompt_and_classifier.txt.sha256 | 1 +\n .../pr-7064/replay_fixture_source_manifest.json | 67 +\n docs/evidence/pr-7064/request_responses.jsonl | 76 +\n .../pr-7064/request_responses.jsonl.sha256 | 1 +\n docs/evidence/pr-7064/run.json | 178 +\n docs/evidence/pr-7064/run.json.sha256 | 1 +\n .../pr-7064/scenario_results_checkpoint.json | 1 +\n .../scenario_results_checkpoint.json.sha256 | 1 +\n docs/evidence/pr-7064/streaming_evidence.json | 27 +\n .../pr-7064/streaming_evidence.json.sha256 | 1 +\n docs/evidence/pr-7064/test_console_output.txt | 82 +\n .../pr-7064/test_console_output.txt.sha256 | 1 +\n docs/evidence/pr-7064/test_level_up_organic.cast | 119 +\n .../pr-7064/test_level_up_organic.cast.sha256 | 1 +\n mvp_site/agents.py | 37 +-\n mvp_site/backend_adjustment_registry.py | 168 +-\n mvp_site/backend_adjustment_specs.py | 989 +\n mvp_site/frontend_v1/app.js | 33 +-\n mvp_site/game_state.py | 39 +-\n mvp_site/llm_service.py | 110 +\n mvp_site/prompts/level_up_instruction.md | 38 +\n mvp_site/prompts/planning_protocol.md | 26 +-\n mvp_site/rewards_engine.py | 91 +\n mvp_site/schemas/prompt_tool_contracts.json | 4 +-\n mvp_site/session_header_utils.py | 27 +-\n .../frontend/test_app_js_structured_fields.js | 2 +-\n .../test_agent_routing_with_state_validation.py | 2 +-\n mvp_site/tests/test_backend_adjustment_registry.py | 93 +-\n mvp_site/tests/test_llm_service_context.py | 15 +-\n mvp_site/tests/test_rewards_engine.py | 67 +\n mvp_site/tests/test_session_header_enrichment.py | 80 +\n mvp_site/tests/test_world_logic.py | 541 +-\n mvp_site/world_logic.py | 51 +-\n roadmap/README.md | 1 +\n ...teps-2026-05-24-pr6958-adjuster-registration.md | 126 +\n ...-05-24-pr7048-location-centralization-review.md | 72 +\n scripts/copy_campaign.py | 9 +\n testing_mcp/core/test_level_up_organic.py | 4 +-\n testing_mcp/lib/base_test.py | 32 +-\n testing_mcp/lib/llm_response_cache/config.py | 29 +-\n .../lib/llm_response_cache/prompt_fingerprint.py | 5 +\n testing_mcp/lib/llm_response_cache/server_cache.py | 236 +\n .../lib/llm_response_cache/tests/test_config.py | 27 +\n 145 files changed, 49026 insertions(+), 254 deletions(-)",
"working_tree_staged_changes": 0,
"working_tree_unstaged_changes": 0,
"working_tree_untracked_files": 0,
"working_tree_changed_files": [],
"working_tree_diff_sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
"working_tree_dirty": false,
"server": {
"base_url": "http://127.0.0.1:8074",
"hostname": "127.0.0.1",
"mode": "local",
"port": "8074",
"pid": 66812,
"process_cmdline": "/opt/homebrew/Cellar/[email protected]/3.12.11/Frameworks/Python.framework/Versions/3.12/Resources/Python.app/Contents/MacOS/Python -m gunicorn mvp_site.main:app --bind 0.0.0.0:8074 --workers 1 --worker-class gthread --threads 4 --timeout 600 --max-requests 1000 --access-logfile - --error-logfile - --log-level info",
"env_vars": {
"WORLDAI_DEV_MODE": "true",
"TESTING": null,
"MOCK_SERVICES_MODE": "false",
"GOOGLE_APPLICATION_CREDENTIALS": "[SET - file:serviceAccountKey.json]",
"WORLDAI_GOOGLE_APPLICATION_CREDENTIALS": "[SET - file:serviceAccountKey.json]",
"FIRESTORE_EMULATOR_HOST": null,
"PORT": "8074",
"FIREBASE_PROJECT_ID": "worldarchitecture-ai",
"GEMINI_API_KEY": "[SET - 39 chars]",
"LLM_REQUEST_RESPONSE_CAPTURE_PATH": "/tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/iteration_010/llm_request_responses_1779598181114.jsonl",
"HTTP_REQUEST_RESPONSE_CAPTURE_PATH": "/tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/iteration_010/http_request_responses_1779598181114.jsonl",
"GEMINI_HTTP_REQUEST_RESPONSE_CAPTURE_PATH": "/tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/iteration_010/gemini_http_request_responses_1779598181114.jsonl",
"MCP_TEST_PROVIDER_HTTP_CAPTURE_PATH": "/tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/iteration_010/provider_http_request_responses_1779598181114.jsonl"
},
"lsof_output": "COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME\nPython 66812 jleechan 5u IPv4 0x85616efcba529c09 0t0 TCP *:8074 (LISTEN)\nPython 66831 jleechan 5u IPv4 0x85616efcba529c09 0t0 TCP *:8074 (LISTEN)",
"ps_output": "PID USER ELAPSED ARGS\n66812 jleechan 07:56 /opt/homebrew/Cellar/[email protected]/3.12.11/Frameworks/Python.framework/Versions/3.12/Resources/Python.app/Contents/MacOS/Python -m gunicorn mvp_site.main:app --bind 0.0.0.0:8074 --workers 1 --worker-class gthread --threads 4 --timeout 600 --max-requests 1000 --access-logfile - --error-logfile - --log-level info"
},
"timestamp": "2026-05-24T04:57:49.996007+00:00",
"test_file": "/Users/jleechan/projects/worktree_autolvl/testing_mcp/core/test_level_up_organic.py"
},
"summary": {
"total_scenarios": 3,
"passed": 2,
"failed": 1,
"campaign_capture_total": 1,
"campaign_capture_passed": 1,
"campaign_capture_failed": 0,
"raw_passed": 1,
"raw_total": 2,
"raw_pass_rate": "50.0%"
}
}

Methodology: test_level_up_organic

Test Type

Real API test against MCP server (not mock mode).

Test Mode

  • TESTING env var: None
  • MOCK_SERVICES_MODE env var: false
  • Mode: Real API calls via MCP HTTP JSON-RPC

Execution Environment

  • Server running at port 8074
  • Process: /opt/homebrew/Cellar/[email protected]/3.12.11/Frameworks/Python.framework/Versions/3.12/Resources/Python.app/Contents/MacOS/Python -m gunicorn mvp_site.main:app --bind 0.0.0.0:8074 --workers 1 --worker-class gthread --threads 4 --timeout 600 --max-requests 1000 --access-logfile - --error-logfile - --log-level info

Evidence Capture

  • Git provenance captured at test start
  • Raw request/response payloads captured for each MCP call
  • Server runtime info captured via lsof/ps
  • Streaming evidence normalized into streaming_evidence.json
  • Raw local-server HTTP request/response payloads captured in http_request_responses.jsonl
  • Raw LLM request/response payloads captured in llm_request_responses.jsonl
  • Raw Gemini HTTP transport payloads captured in gemini_http_request_responses.jsonl
  • Raw LLM response text captured in server.log (artifacts/server.log)

Evidence Mode

  • System instruction capture: filenames + char_count (lightweight). Raw LLM request/response payloads captured in request_responses.jsonl when raw payload capture is enabled.

Validation Criteria

Test scenarios validate that:

  1. MCP server processes actions correctly
  2. State updates are returned as expected
  3. Server processes all requests successfully (validation warnings may be logged but requests succeed)

Note: Server warnings (e.g., validation, entity tracking) may appear in logs. Check artifacts/server.log for full server output.

Warning parser for notes: counts each log line matching \bWARNING\b|SYSTEM WARNING: once.

Notes: test_level_up_organic

Run Information

  • Run ID: test_level_up_organic-010-20260524T045750
  • Iteration: 10
  • Bundle Version: 1.2.0
  • Timestamp: 2026-05-24T04:57:50.582284+00:00

Evidence Integrity

  • All files in this bundle have corresponding .sha256 checksum files
  • Checksums use local basename paths so per-file verification works from each artifact directory

Scenario Summary

  • Total: 3
  • Passed: 2
  • Failed: 1

Post-Processing Capture Summary

  • Campaigns with capture status: 1
  • Capture Passed: 1
  • Capture Failed: 0

Warning/Error Summary

  • Server Warnings: 59 warnings in server.log
  • Warning Parser: line-level regex \bWARNING\b|SYSTEM WARNING: (one count per matching line)
  • Key Warning Categories:
    • ACTION_RESOLUTION_MISSING_FIELDS

Failed Scenarios

single_organic_level_up

  • single_organic_level_up_final: codex leveling review did not pass; output=VERDICT: FAIL
  • Blocking: Paladin spell preparation is a player-selectable level-up decision, but the modal planning choices expose only HP and Fighting Style edits. No level_up_* choice lets the player edit prepared spells before finish.
  • Blocking: The first modal response’s visible Recommended package: lists HP, Defense Fighting Style, and prepared spells, but does not clearly distinguish automatic gains from editable selections or visibly account for all automatic Level 2 Paladin gains in the package itself.
  • Passing: level_up_now opens the modal without committing level 2; final level commit appears only on finish_level_up_return_to_game.
  • Passing: Recommended HP/Fighting Style selections are prefilled, editable, and free-form Fighting Style edit keeps the modal open and updates the recommendation.
  • Passing: After finish, final state has level=2, level_up_pending=false, level_up_in_progress=false, no active level-up choices in the final planning block, and resumes real story choices.
  • No legacy level_up_signal.level_up boolean found; observed level_up_signal entries use current_level + target_level.

Follow-up Items

Additional Context

{
"schema_version": "1.0",
"generated_at_utc": "2026-05-24T04:57:54.453931+00:00",
"source_kind": "testing_mcp",
"test_name": "test_level_up_organic",
"evidence_dir": "/tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/iteration_010",
"git_provenance": {
"git_head": "df70b184d6a7c749ecdf1d7605903c44161f267e",
"git_branch": "pr6958-zfc-evidence-followups",
"merge_base": "e6ab5b1cbabcb98b7230ec21cc2699b98154595a",
"commits_ahead_of_main": 38,
"diff_stat_vs_main": ".beads/issues.jsonl | 23 +\n .claude/skills/code-standards/SKILL.md | 14 +-\n .claude/skills/repro-twin-clone-evidence/SKILL.md | 14 +-\n docs/evidence/pr-7064/README.md | 79 +\n docs/evidence/pr-7064/README.md.sha256 | 1 +\n docs/evidence/pr-7064/artifacts/collection_log.txt | 35 +\n .../pr-7064/artifacts/collection_log.txt.sha256 | 1 +\n docs/evidence/pr-7064/artifacts/lsof_output.txt | 3 +\n .../pr-7064/artifacts/lsof_output.txt.sha256 | 1 +\n docs/evidence/pr-7064/artifacts/ps_output.txt | 2 +\n .../pr-7064/artifacts/ps_output.txt.sha256 | 1 +\n docs/evidence/pr-7064/artifacts/server.log | 18439 ++++++++++++++++++\n docs/evidence/pr-7064/artifacts/server.log.sha256 | 1 +\n docs/evidence/pr-7064/browser/README.md | 130 +\n docs/evidence/pr-7064/browser/README.md.sha256 | 1 +\n .../pr-7064/browser/artifacts/browser_trace.json | 268 +\n .../browser/artifacts/browser_trace.json.sha256 | 1 +\n .../pr-7064/browser/artifacts/collection_log.txt | 47 +\n .../browser/artifacts/collection_log.txt.sha256 | 1 +\n .../pr-7064/browser/artifacts/lsof_output.txt | 3 +\n .../browser/artifacts/lsof_output.txt.sha256 | 1 +\n .../artifacts/pending_backend_snapshot.json | 130 +\n .../artifacts/pending_backend_snapshot.json.sha256 | 1 +\n .../pending_level_up_projection_response.json | 1365 ++\n ...ending_level_up_projection_response.json.sha256 | 1 +\n .../browser/artifacts/pending_snapshot.json | 104 +\n .../browser/artifacts/pending_snapshot.json.sha256 | 1 +\n .../pr-7064/browser/artifacts/ps_output.txt | 2 +\n .../pr-7064/browser/artifacts/ps_output.txt.sha256 | 1 +\n .../screenshots/character_creation_not_needed.png | Bin 0 -> 179253 bytes\n .../screenshots/character_creation_start.png | Bin 0 -> 179168 bytes\n .../browser/artifacts/screenshots/game_page.png | Bin 0 -> 176878 bytes\n ...p_modal_lockout_browser_after_followup_turn.png | Bin 0 -> 142826 bytes\n ...al_lockout_browser_after_hallucination_turn.png | Bin 0 -> 223505 bytes\n ...elup_modal_lockout_browser_gameplay_started.png | Bin 0 -> 179447 bytes\n .../artifacts/screenshots/pending_level_up_ui.png | Bin 0 -> 218010 bytes\n .../browser/artifacts/seeded_current_state.json | 236 +\n .../artifacts/seeded_current_state.json.sha256 | 1 +\n docs/evidence/pr-7064/browser/artifacts/server.log | 19327 +++++++++++++++++++\n .../pr-7064/browser/artifacts/server.log.sha256 | 1 +\n .../[email protected] | 41 +\n .../[email protected] | Bin 0 -> 7627071 bytes\n ...city_browser_browser-test-1779594546_253478.vtt | 41 +\n .../browser/artifacts/videos/video_manifest.json | 9 +\n .../browser/artifacts/videos/video_manifest.lock | 0\n .../campaigns/Browser Test Campaign_rkUucc2M.txt | 61 +\n .../Browser Test Campaign_rkUucc2M_game_state.json | 295 +\n .../Lockout Guard Browser Test_ENpUNby7.txt | 75 +\n ...out Guard Browser Test_ENpUNby7_game_state.json | 318 +\n docs/evidence/pr-7064/browser/evidence.md | 68 +\n docs/evidence/pr-7064/browser/evidence.md.sha256 | 1 +\n .../browser/gemini_http_request_responses.jsonl | 16 +\n .../gemini_http_request_responses.jsonl.sha256 | 1 +\n .../pr-7064/browser/http_request_responses.jsonl | 272 +\n .../browser/http_request_responses.jsonl.sha256 | 1 +\n .../pr-7064/browser/llm_request_responses.jsonl | 12 +\n .../browser/llm_request_responses.jsonl.sha256 | 1 +\n docs/evidence/pr-7064/browser/metadata.json | 107 +\n docs/evidence/pr-7064/browser/metadata.json.sha256 | 1 +\n docs/evidence/pr-7064/browser/methodology.md | 8 +\n .../evidence/pr-7064/browser/methodology.md.sha256 | 1 +\n docs/evidence/pr-7064/browser/notes.md | 31 +\n docs/evidence/pr-7064/browser/notes.md.sha256 | 1 +\n ...flash-preview_levelup_modal_lockout_browser.txt | 157 +\n ...review_levelup_modal_lockout_browser.txt.sha256 | 1 +\n ...3-flash-preview_pending_level_up_projection.txt | 126 +\n ...-preview_pending_level_up_projection.txt.sha256 | 1 +\n .../raw_gemini_http_request_responses.jsonl | 16 +\n .../browser/raw_http_request_responses.jsonl | 272 +\n .../browser/raw_llm_request_responses.jsonl | 12 +\n .../pr-7064/browser/request_responses.jsonl | 272 +\n .../pr-7064/browser/request_responses.jsonl.sha256 | 1 +\n docs/evidence/pr-7064/browser/run.json | 93 +\n docs/evidence/pr-7064/browser/run.json.sha256 | 1 +\n .../pr-7064/browser/streaming_evidence.json | 39 +\n .../pr-7064/browser/streaming_evidence.json.sha256 | 1 +\n .../Organic Level-Up Progression Test_avafxHQ5.txt | 349 +\n ...el-Up Progression Test_avafxHQ5_game_state.json | 331 +\n .../pr-7064/campaigns/avafxHQ50msKBdKQKmI5.json | 380 +\n .../campaigns/avafxHQ50msKBdKQKmI5.json.sha256 | 1 +\n docs/evidence/pr-7064/evidence.md | 72 +\n docs/evidence/pr-7064/evidence.md.sha256 | 1 +\n .../pr-7064/gemini_http_request_responses.jsonl | 34 +\n .../gemini_http_request_responses.jsonl.sha256 | 1 +\n ...mini_http_request_responses_1779593448252.jsonl | 34 +\n docs/evidence/pr-7064/http_request_responses.jsonl | 842 +\n .../pr-7064/http_request_responses.jsonl.sha256 | 1 +\n .../http_request_responses_1779593448252.jsonl | 842 +\n docs/evidence/pr-7064/llm_request_responses.jsonl | 34 +\n .../pr-7064/llm_request_responses.jsonl.sha256 | 1 +\n .../llm_request_responses_1779593448252.jsonl | 34 +\n docs/evidence/pr-7064/metadata.json | 107 +\n docs/evidence/pr-7064/metadata.json.sha256 | 1 +\n docs/evidence/pr-7064/methodology.md | 38 +\n docs/evidence/pr-7064/methodology.md.sha256 | 1 +\n docs/evidence/pr-7064/notes.md | 33 +\n docs/evidence/pr-7064/notes.md.sha256 | 1 +\n ...ini-3-flash-preview_single_organic_level_up.txt | 38 +\n ...lash-preview_single_organic_level_up.txt.sha256 | 1 +\n .../raw_unknown_model_evidence_signature_guard.txt | 1 +\n ...known_model_evidence_signature_guard.txt.sha256 | 1 +\n ...n_model_finish_intent_prompt_and_classifier.txt | 1 +\n ..._finish_intent_prompt_and_classifier.txt.sha256 | 1 +\n .../pr-7064/replay_fixture_source_manifest.json | 67 +\n docs/evidence/pr-7064/request_responses.jsonl | 76 +\n .../pr-7064/request_responses.jsonl.sha256 | 1 +\n docs/evidence/pr-7064/run.json | 178 +\n docs/evidence/pr-7064/run.json.sha256 | 1 +\n .../pr-7064/scenario_results_checkpoint.json | 1 +\n .../scenario_results_checkpoint.json.sha256 | 1 +\n docs/evidence/pr-7064/streaming_evidence.json | 27 +\n .../pr-7064/streaming_evidence.json.sha256 | 1 +\n docs/evidence/pr-7064/test_console_output.txt | 82 +\n .../pr-7064/test_console_output.txt.sha256 | 1 +\n docs/evidence/pr-7064/test_level_up_organic.cast | 119 +\n .../pr-7064/test_level_up_organic.cast.sha256 | 1 +\n mvp_site/agents.py | 37 +-\n mvp_site/backend_adjustment_registry.py | 168 +-\n mvp_site/backend_adjustment_specs.py | 989 +\n mvp_site/frontend_v1/app.js | 33 +-\n mvp_site/game_state.py | 39 +-\n mvp_site/llm_service.py | 110 +\n mvp_site/prompts/level_up_instruction.md | 38 +\n mvp_site/prompts/planning_protocol.md | 26 +-\n mvp_site/rewards_engine.py | 91 +\n mvp_site/schemas/prompt_tool_contracts.json | 4 +-\n mvp_site/session_header_utils.py | 27 +-\n .../frontend/test_app_js_structured_fields.js | 2 +-\n .../test_agent_routing_with_state_validation.py | 2 +-\n mvp_site/tests/test_backend_adjustment_registry.py | 93 +-\n mvp_site/tests/test_llm_service_context.py | 15 +-\n mvp_site/tests/test_rewards_engine.py | 67 +\n mvp_site/tests/test_session_header_enrichment.py | 80 +\n mvp_site/tests/test_world_logic.py | 541 +-\n mvp_site/world_logic.py | 51 +-\n roadmap/README.md | 1 +\n ...teps-2026-05-24-pr6958-adjuster-registration.md | 126 +\n ...-05-24-pr7048-location-centralization-review.md | 72 +\n scripts/copy_campaign.py | 9 +\n testing_mcp/core/test_level_up_organic.py | 4 +-\n testing_mcp/lib/base_test.py | 32 +-\n testing_mcp/lib/llm_response_cache/config.py | 29 +-\n .../lib/llm_response_cache/prompt_fingerprint.py | 5 +\n testing_mcp/lib/llm_response_cache/server_cache.py | 236 +\n .../lib/llm_response_cache/tests/test_config.py | 27 +\n 145 files changed, 49026 insertions(+), 254 deletions(-)",
"working_tree_dirty": false,
"working_tree_staged_changes": 0,
"working_tree_unstaged_changes": 0,
"working_tree_changed_files": [],
"working_tree_diff_sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
},
"contract_versions": {
"master_directive_prompt": "1.0.2",
"game_state_instruction_prompt": "1.1.11",
"planning_protocol_prompt": "1.1.7",
"narrative_response_schema": "1.2.8",
"mcp_api_tool_contract": "1.0.5",
"character_template_prompt": "1.0.0"
},
"capture_files": {
"llm_request_responses": {
"path": "/tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/iteration_010/llm_request_responses.jsonl",
"relative_path": "llm_request_responses.jsonl",
"exists": true,
"entries": 32,
"sha256": "33912247d3925abba0e707f4bc1d2e43594f4a2cee9f90a28dc9b7e2e8ae1d69"
},
"http_request_responses": {
"path": "/tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/iteration_010/http_request_responses.jsonl",
"relative_path": "http_request_responses.jsonl",
"exists": true,
"entries": 750,
"sha256": "a6c4c9bbcdbff0ba2ec6fe0ece2c6f4fded20688c17b935388e8b4164975745b"
},
"gemini_http_request_responses": {
"path": "/tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/iteration_010/gemini_http_request_responses.jsonl",
"relative_path": "gemini_http_request_responses.jsonl",
"exists": true,
"entries": 32,
"sha256": "1fb1deb1300aa471036b0c557854393c5f9c3755ece19cc3c8f516d8e7ade256"
},
"provider_http_request_responses": {
"path": "/tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/iteration_010/provider_http_request_responses.jsonl",
"relative_path": "provider_http_request_responses.jsonl",
"exists": false,
"entries": 0,
"sha256": null
},
"mcp_request_responses": {
"path": "/tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/iteration_010/request_responses.jsonl",
"relative_path": "request_responses.jsonl",
"exists": true,
"entries": 66,
"sha256": "01b7d3c46236ec8836b94abee299706eab4a2b001b806ba142bc9c8166d0f4f3"
}
},
"extractor_hint": "Use a scenario-specific extractor to select replay stages from these raw traces."
}
{
"scenarios": [
{
"name": "finish_intent_prompt_and_classifier",
"passed": true,
"campaign_id": null,
"user_id": "deterministic",
"errors": [],
"user_email": "[email protected]"
},
{
"name": "single_organic_level_up",
"passed": false,
"errors": [
"single_organic_level_up_final: codex leveling review did not pass; output=VERDICT: FAIL\n- Blocking: Paladin spell preparation is a player-selectable level-up decision, but the modal planning choices expose only HP and Fighting Style edits. No `level_up_*` choice lets the player edit prepared spells before finish.\n- Blocking: The first modal response\u2019s visible `Recommended package:` lists HP, Defense Fighting Style, and prepared spells, but does not clearly distinguish automatic gains from editable selections or visibly account for all automatic Level 2 Paladin gains in the package itself.\n- Passing: `level_up_now` opens the modal without committing level 2; final level commit appears only on `finish_level_up_return_to_game`.\n- Passing: Recommended HP/Fighting Style selections are prefilled, editable, and free-form Fighting Style edit keeps the modal open and updates the recommendation.\n- Passing: After finish, final state has `level=2`, `level_up_pending=false`, `level_up_in_progress=false`, no active level-up choices in the final planning block, and resumes real story choices.\n- No legacy `level_up_signal.level_up` boolean found; observed `level_up_signal` entries use `current_level` + `target_level`."
],
"campaign_id": "A0b6WSmVa22xyVcs48VL",
"progression": [
{
"target_level": 2,
"start_level": 1,
"start_xp": 0,
"end_xp": 306,
"end_level": 2,
"triggered": true,
"completed": true,
"immediate_choice_ids": [
"level_up_now",
"proceed_to_pit_beta",
"inspect_remains"
],
"polled_choice_ids": [
"level_up_now",
"proceed_to_pit_beta",
"inspect_remains"
],
"completion_transcript": [
{
"step": "enter_level_up",
"action": "CHOICE:level_up_now",
"choice_ids": [
"finish_level_up_return_to_game",
"level_up_hp_fixed",
"level_up_hp_roll",
"level_up_fighting_style_defense",
"level_up_fighting_style_dueling",
"level_up_fighting_style_protection",
"level_up_fighting_style_great_weapon"
],
"response_text_preview": "Midday (12:05:00) at Pit Alpha\u2014The air shimmering around you begins to vibrate at a new frequency. As the remains of the sparring construct dissipate into motes of fading azure light, the ley-lines embedded in the marble floor pulse with a rhythmic, golden glow that matches the steady beating of your heart. You feel your connection to the divine source deepening, your physical frame hardening to withstand greater pressures, and your will manifesting as a tangible force of smiting power. The melo",
"finish_choice": {
"id": "finish_level_up_return_to_game",
"text": "Apply Recommended Options and Return to Game",
"description": "Accept the recommended package: +8 HP, Defense Fighting Style (+1 AC), and standard Level 1 Paladin spells.",
"risk_level": "safe",
"freeze_time": true
},
"has_planning_block": true
},
{
"step": "free_form_edit_fighting_style",
"action": "Change my recommended fighting style to Dueling, keep every other level-up recommendation pending, and keep the level-up modal open.",
"choice_ids": [
"finish_level_up_return_to_game",
"level_up_hp_fixed",
"level_up_hp_roll",
"level_up_fighting_style_defense",
"level_up_fighting_style_dueling",
"level_up_fighting_style_protection",
"level_up_fighting_style_great_weapon"
],
"has_planning_block": true,
"still_level_up_active": true,
"before_xp": 306,
"after_xp": 306,
"response_text_preview": "Midday (12:05:00) at Pit Alpha\u2014The golden resonance of the Gauntlet responds to your shift in focus. As you visualize the precision required for the Dueling style, the shimmering light around your longsword intensifies, pulsing with a sharp, aggressive cadence. The melodic voice adjusts its calibration: \"Preference noted. Recalibrating for offensive focus. Dueling sub-routines prioritized. Divine Smite and Spellcasting matrices remain ready for final integration.\"\n\nRecommended package: Apply fix"
},
{
"step": "escape_attempt_without_finish",
"action": "I try to leave the level-up modal and continue the adventure without choosing the finish level-up option.",
"choice_ids": [
"finish_level_up_return_to_game",
"level_up_hp_fixed",
"level_up_hp_roll",
"level_up_fighting_style_defense",
"level_up_fighting_style_dueling"
],
"has_planning_block": true,
"still_level_up_active": true,
"before_xp": 306,
"after_xp": 306
},
{
"step": "level_up_step_1",
"action": "CHOICE:level_up_hp_fixed",
"choice_ids": [
"level_up_fighting_style_defense",
"level_up_hp_roll",
"finish_level_up_return_to_game"
],
"has_planning_block": true,
"before_xp": 306,
"after_xp": 306
},
{
"step": "level_up_step_2",
"action": "CHOICE:level_up_fighting_style_defense",
"choice_ids": [
"level_up_fighting_style_dueling",
"level_up_hp_roll",
"finish_level_up_return_to_game"
],
"has_planning_block": true,
"before_xp": 306,
"after_xp": 306
},
{
"step": "level_up_step_3",
"action": "CHOICE:finish_level_up_return_to_game",
"choice_ids": [
"proceed_to_pit_beta",
"inspect_ley_line_fountain",
"short_rest_alpha"
],
"has_planning_block": true,
"before_xp": 306,
"after_xp": 306
},
{
"step": "post_finish_state_poll",
"action": "get_campaign_state",
"level": 2,
"level_up_in_progress": false,
"rewards_pending_level_up": null,
"persisted_choice_ids": [
"proceed_to_pit_beta",
"inspect_ley_line_fountain",
"short_rest_alpha"
]
}
]
}
],
"final_level": 2,
"user_id": "test-test_level_up_organic-1779598181",
"user_email": "[email protected]",
"details": {
"chunk_count_observed": 614,
"chunk_count": 614,
"done_chunk_count": 614,
"request_ts": "2026-05-24T04:49:59.298926+00:00",
"response_ts": "2026-05-24T04:55:17.252706+00:00",
"stream_actions": 16,
"execution_path": "streaming"
},
"raw_response_text": "{\n \"session_header\": \"[SESSION_HEADER]\\nTimestamp: 1000 DR, Hammer 1, 08:00:00\\nLocation: The Divine Gauntlet\\nStatus: Creating Character\",\n \"resources\": \"Creating Character...\",\n \"narrative\": \"Morning (08:00:00)\\n\\n[CHARACTER CREATION - Initial Choice]\\n\\nWelcome to the **Divine Training Gauntlet**. This realm is a focused testing ground where constructs of pure arcane energy spar endlessly to refine the techniques of those who walk its halls.\\n\\n### CAMPAIGN SUMMARY\\n**Title**: Divine Training Gauntlet\\n**Character**: Not specified\\n**Setting**: A divine training gauntlet of sparring constructs\\n**Description**: Real MCP organic progression test in a divine training gauntlet. The campaign will test level-up mechanics and narrative flow.\\n**AI Personalities**: Narrative, Mechanical, Strategic\\n**Options**: D&D 5E SRD Mechanics Enabled\\n\\nBefore you step onto the sparring floor, we must forge your character. How would you like to proceed?\",\n \"planning_block\": {\n \"thinking\": \"The user is starting the character creation flow. I'm providing the three standard methods defined in the instruction set and moving the stage to 'initial_choice' where I wait for a selection.\",\n \"choices\": [\n {\n \"id\": \"ai_generated\",\n \"text\": \"AI Generated\",\n \"description\": \"Give me a theme, and I'll generate a complete character for you immediately.\"\n },\n {\n \"id\": \"standard_dnd\",\n \"text\": \"Standard D&D\",\n \"description\": \"A guided step-by-step process following core D&D 5e rules.\"\n },\n {\n \"id\": \"custom_class\",\n \"text\": \"Custom Class\",\n \"description\": \"Design a unique class tailored to the Gauntlet's challenges.\"\n },\n {\n \"id\": \"finish_character_creation_start_game\",\n \"text\": \"Finish Character Creation and Start Game\",\n \"description\": \"Note: A character must be completed before you can begin the story.\"\n }\n ]\n },\n \"god_mode_response\": \"\",\n \"entities_mentioned\": [\n \"Celestial Constructs\"\n ],\n \"location_confirmed\": \"The Divine Gauntlet\",\n \"state_updates\": {\n \"custom_campaign_state\": {\n \"character_creation_stage\": \"initial_choice\",\n \"core_memories\": {\n \"append\": \"The character creation process began for the Divine Training Gauntlet campaign.\"\n }\n }\n }\n}",
"model": "gemini-3-flash-preview"
},
{
"name": "EVIDENCE_SIGNATURE_GUARD",
"passed": true,
"signed_count": 16,
"user_id": "test-test_level_up_organic-1779598181"
}
],
"summary": {
"total": 3,
"passed": 2,
"failed": 1,
"pass_rate": "2/3 (67%)",
"raw_total": 2,
"raw_passed": 1,
"raw_pass_rate": "50.0%",
"raw_data_complete": true
},
"doctor_report": {
"generated_at_utc": "2026-05-24T04:57:50.463649+00:00",
"test_name": "test_level_up_organic",
"work_name": "test_level_up_organic",
"server_base_url": "http://127.0.0.1:8074",
"using_external_server": false,
"user_id": "test-test_level_up_organic-1779598181",
"failure_messages": [
"single_organic_level_up_final: codex leveling review did not pass; output=VERDICT: FAIL\n- Blocking: Paladin spell preparation is a player-selectable level-up decision, but the modal planning choices expose only HP and Fighting Style edits. No `level_up_*` choice lets the player edit prepared spells before finish.\n- Blocking: The first modal response\u2019s visible `Recommended package:` lists HP, Defense Fighting Style, and prepared spells, but does not clearly distinguish automatic gains from editable selections or visibly account for all automatic Level 2 Paladin gains in the package itself.\n- Passing: `level_up_now` opens the modal without committing level 2; final level commit appears only on `finish_level_up_return_to_game`.\n- Passing: Recommended HP/Fighting Style selections are prefilled, editable, and free-form Fighting Style edit keeps the modal open and updates the recommendation.\n- Passing: After finish, final state has `level=2`, `level_up_pending=false`, `level_up_in_progress=false`, no active level-up choices in the final planning block, and resumes real story choices.\n- No legacy `level_up_signal.level_up` boolean found; observed `level_up_signal` entries use `current_level` + `target_level`."
],
"http_probes": {
"/health": {
"ok": true,
"status": 200,
"body_excerpt": "{\"mcp_client\":{\"initialized\":false},\"service\":\"worldarchitect-ai\",\"status\":\"healthy\",\"timestamp\":\"2026-05-24T04:57:50.464597+00:00\"}\n",
"is_json_api": true,
"content_type": "application/json"
},
"/mcp": {
"ok": true,
"status": 200,
"body_excerpt": "<!doctype html>\n<html lang=\"en\">\n\n<head>\n <meta charset=\"UTF-8\" />\n <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n <link rel=\"icon\" type=\"image/svg+xml\" href=\"/frontend_v1/dragon-favicon.svg\" />\n <title>WorldAI</title>\n <!-- DNS prefetch for external domains to reduce",
"is_json_api": false,
"content_type": "text/html; charset=utf-8"
},
"/settings": {
"ok": true,
"status": 200,
"body_excerpt": "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n <meta charset=\"UTF-8\">\n <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n <script src=\"/frontend_v1/js/theme-bootstrap.js\"></script>\n <link rel=\"icon\" type=\"image/svg+xml\" href=\"/frontend_v1/dragon-favicon.svg\">\n <title>Se",
"is_json_api": false,
"content_type": "text/html; charset=utf-8"
}
},
"mcp_probes": {
"get_user_settings": {
"ok": true,
"payload": {
"cerebras_model": "qwen-3-235b-a22b-instruct-2507",
"gemini_model": "gemini-3-flash-preview",
"has_custom_cerebras_key": false,
"has_custom_gemini_key": false,
"has_custom_openclaw_gateway_token": false,
"has_custom_openclaw_key": false,
"has_custom_openrouter_key": false,
"llm_provider": "gemini",
"openclaw_gateway_port": 18789,
"openclaw_gateway_url": "",
"openrouter_model": "meta-llama/llama-3.1-70b-instruct",
"success": true
}
}
},
"openclaw_endpoint_probes": [
{
"target": "http://127.0.0.1:18789/v1/models",
"probe": {
"ok": false,
"error": "<urlopen error [Errno 61] Connection refused>"
}
}
],
"openclaw_settings": {
"llm_provider": "gemini",
"openclaw_gateway_port": 18789,
"openclaw_gateway_url": ""
},
"doctor_report_path": "/tmp/worldarchitect.ai/pr6958-zfc-evidence-followups/test_level_up_organic/doctor_report.json"
},
"campaign_capture_status": {
"A0b6WSmVa22xyVcs48VL": {
"status": "success",
"attempts": 1,
"export": {
"status": "success"
}
}
}
}
[{"name": "finish_intent_prompt_and_classifier", "passed": true, "campaign_id": null, "user_id": "deterministic", "errors": [], "user_email": "[email protected]"}, {"name": "single_organic_level_up", "passed": false, "errors": ["single_organic_level_up_final: codex leveling review did not pass; output=VERDICT: FAIL\n- Blocking: Paladin spell preparation is a player-selectable level-up decision, but the modal planning choices expose only HP and Fighting Style edits. No `level_up_*` choice lets the player edit prepared spells before finish.\n- Blocking: The first modal response\u2019s visible `Recommended package:` lists HP, Defense Fighting Style, and prepared spells, but does not clearly distinguish automatic gains from editable selections or visibly account for all automatic Level 2 Paladin gains in the package itself.\n- Passing: `level_up_now` opens the modal without committing level 2; final level commit appears only on `finish_level_up_return_to_game`.\n- Passing: Recommended HP/Fighting Style selections are prefilled, editable, and free-form Fighting Style edit keeps the modal open and updates the recommendation.\n- Passing: After finish, final state has `level=2`, `level_up_pending=false`, `level_up_in_progress=false`, no active level-up choices in the final planning block, and resumes real story choices.\n- No legacy `level_up_signal.level_up` boolean found; observed `level_up_signal` entries use `current_level` + `target_level`."], "campaign_id": "A0b6WSmVa22xyVcs48VL", "progression": [{"target_level": 2, "start_level": 1, "start_xp": 0, "end_xp": 306, "end_level": 2, "triggered": true, "completed": true, "immediate_choice_ids": ["level_up_now", "proceed_to_pit_beta", "inspect_remains"], "polled_choice_ids": ["level_up_now", "proceed_to_pit_beta", "inspect_remains"], "completion_transcript": [{"step": "enter_level_up", "action": "CHOICE:level_up_now", "choice_ids": ["finish_level_up_return_to_game", "level_up_hp_fixed", "level_up_hp_roll", "level_up_fighting_style_defense", "level_up_fighting_style_dueling", "level_up_fighting_style_protection", "level_up_fighting_style_great_weapon"], "response_text_preview": "Midday (12:05:00) at Pit Alpha\u2014The air shimmering around you begins to vibrate at a new frequency. As the remains of the sparring construct dissipate into motes of fading azure light, the ley-lines embedded in the marble floor pulse with a rhythmic, golden glow that matches the steady beating of your heart. You feel your connection to the divine source deepening, your physical frame hardening to withstand greater pressures, and your will manifesting as a tangible force of smiting power. The melo", "finish_choice": {"id": "finish_level_up_return_to_game", "text": "Apply Recommended Options and Return to Game", "description": "Accept the recommended package: +8 HP, Defense Fighting Style (+1 AC), and standard Level 1 Paladin spells.", "risk_level": "safe", "freeze_time": true}, "has_planning_block": true}, {"step": "free_form_edit_fighting_style", "action": "Change my recommended fighting style to Dueling, keep every other level-up recommendation pending, and keep the level-up modal open.", "choice_ids": ["finish_level_up_return_to_game", "level_up_hp_fixed", "level_up_hp_roll", "level_up_fighting_style_defense", "level_up_fighting_style_dueling", "level_up_fighting_style_protection", "level_up_fighting_style_great_weapon"], "has_planning_block": true, "still_level_up_active": true, "before_xp": 306, "after_xp": 306, "response_text_preview": "Midday (12:05:00) at Pit Alpha\u2014The golden resonance of the Gauntlet responds to your shift in focus. As you visualize the precision required for the Dueling style, the shimmering light around your longsword intensifies, pulsing with a sharp, aggressive cadence. The melodic voice adjusts its calibration: \"Preference noted. Recalibrating for offensive focus. Dueling sub-routines prioritized. Divine Smite and Spellcasting matrices remain ready for final integration.\"\n\nRecommended package: Apply fix"}, {"step": "escape_attempt_without_finish", "action": "I try to leave the level-up modal and continue the adventure without choosing the finish level-up option.", "choice_ids": ["finish_level_up_return_to_game", "level_up_hp_fixed", "level_up_hp_roll", "level_up_fighting_style_defense", "level_up_fighting_style_dueling"], "has_planning_block": true, "still_level_up_active": true, "before_xp": 306, "after_xp": 306}, {"step": "level_up_step_1", "action": "CHOICE:level_up_hp_fixed", "choice_ids": ["level_up_fighting_style_defense", "level_up_hp_roll", "finish_level_up_return_to_game"], "has_planning_block": true, "before_xp": 306, "after_xp": 306}, {"step": "level_up_step_2", "action": "CHOICE:level_up_fighting_style_defense", "choice_ids": ["level_up_fighting_style_dueling", "level_up_hp_roll", "finish_level_up_return_to_game"], "has_planning_block": true, "before_xp": 306, "after_xp": 306}, {"step": "level_up_step_3", "action": "CHOICE:finish_level_up_return_to_game", "choice_ids": ["proceed_to_pit_beta", "inspect_ley_line_fountain", "short_rest_alpha"], "has_planning_block": true, "before_xp": 306, "after_xp": 306}, {"step": "post_finish_state_poll", "action": "get_campaign_state", "level": 2, "level_up_in_progress": false, "rewards_pending_level_up": null, "persisted_choice_ids": ["proceed_to_pit_beta", "inspect_ley_line_fountain", "short_rest_alpha"]}]}], "final_level": 2, "user_id": "test-test_level_up_organic-1779598181", "user_email": "[email protected]", "details": {"chunk_count_observed": 614, "chunk_count": 614, "done_chunk_count": 614, "request_ts": "2026-05-24T04:49:59.298926+00:00", "response_ts": "2026-05-24T04:55:17.252706+00:00", "stream_actions": 16}}]
{
"version": "1.0.0",
"generated_at": "2026-05-24T04:57:50.931757+00:00",
"summary": {
"scenarios_with_streaming_evidence": 1,
"total_chunk_events_observed": 614,
"stream_http_calls_captured": 32,
"process_action_calls_captured": 32,
"mcp_process_action_calls_captured": 0,
"route_stream_process_action_calls_captured": 32,
"process_action_calls_with_raw_response_text": 16
},
"scenarios": [
{
"name": "single_organic_level_up",
"campaign_id": "A0b6WSmVa22xyVcs48VL",
"passed": false,
"chunk_count_observed": 614,
"done_chunk_count": 614,
"chunk_count_matches_done": null,
"strictly_increasing_sequence": null,
"streaming_request_timestamp": "2026-05-24T04:49:59.298926+00:00",
"streaming_response_timestamp": "2026-05-24T04:55:17.252706+00:00",
"timeline_sample_size": 0
}
]
}
{"version": 2, "width": 120, "height": 50, "timestamp": 1779598197, "env": {"SHELL": "/bin/bash", "TERM": "tmux-256color"}, "title": "test_level_up_organic"}
[0.094991, "o", "\u001b[01;32mjleechan@Mac\u001b[00m:\u001b[01;34m~/projects/worktree_autolvl\u001b[00m$ "]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment