esz135888 · May 23, 2026 22:34
diff --git a/acceptance-tests.md b/acceptance-tests.md
diff --git a/artifact-url-or-pr.md b/artifact-url-or-pr.md
diff --git a/data-model.md b/data-model.md
diff --git a/decision-record.md b/decision-record.md
diff --git a/learning-memory.json b/learning-memory.json
 {
  "job_id": "073eefe3-10c0-438e-b2b7-42a3fbf0e85f",
  "project": "AI 自建專案：公司AI化",
  "learned_at": "2026-05-24T06:40:00+08:00",
  "solution_selection": "eval + system + watchdog",
  "market_context": [
    {
      "source": "OpenAI evaluation best practices",
      "lesson": "Prediction validation should use production, historical, synthetic, and human-curated data, not a single judge score."
    },
    {
      "source": "LangSmith evaluation concepts",
      "lesson": "Evals should support lifecycle measurement, including production monitoring and benchmarking."
    },
    {
      "source": "MLOps patterns",
      "lesson": "AI systems need continuous monitoring, metadata, governance, and feedback loops tied to business KPIs."
    }
  ],
  "pls_next_checks": [
    "Check whether each review writes explicit predictions into a ledger.",
    "Do not trust prediction quality until evidence_coverage and false_positive_rate are tracked.",
    "Require human approval before high-risk prediction verdicts affect people, budget, or project state.",
    "If evidence sync fails twice, dispatch repo_change for backend integration."
  ],
  "assumptions_overturned": [
    "A commit that adds a validation module is not enough; PLS needs a visible eval contract and adoption path.",
    "AI prediction quality cannot be inferred from confidence text; it needs later evidence.",
    "Tool choice conflict should be resolved by schema and acceptance first, vendor second."
  ],
  "next_iteration_condition": "Run 20 prediction validations; if hit_rate >= 70%, false_positive_rate <= 15%, and evidence_coverage >= 80%, promote to backend workflow or agent."
 }
diff --git a/market-maturity.md b/market-maturity.md
diff --git a/prediction-ledger-seed.csv b/prediction-ledger-seed.csv
diff --git a/prediction-verification-eval-console.html b/prediction-verification-eval-console.html
 <!doctype html>
 <html lang="zh-Hant">
 <head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>AI 預測驗證 Eval Console</title>
  <style>
    :root { --ink:#172033; --muted:#617086; --line:#d8dee9; --bg:#f5f7fb; --panel:#fff; --green:#087443; --red:#b42318; --amber:#a15c07; --blue:#175cd3; }
    * { box-sizing:border-box; }
    body { margin:0; background:var(--bg); color:var(--ink); font:14px/1.5 -apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif; }
    header { background:var(--panel); border-bottom:1px solid var(--line); padding:28px 32px; }
    h1 { margin:0 0 6px; font-size:26px; letter-spacing:0; }
    h2 { margin:0 0 12px; font-size:17px; }
    main { max-width:1240px; margin:0 auto; padding:22px 18px 42px; display:grid; gap:16px; }
    section { background:var(--panel); border:1px solid var(--line); border-radius:8px; padding:18px; }
    .grid { display:grid; grid-template-columns:repeat(4,minmax(0,1fr)); gap:12px; }
    .card { border:1px solid var(--line); border-radius:8px; background:#fbfcff; padding:14px; min-height:116px; }
    .label { color:var(--muted); font-size:12px; text-transform:uppercase; }
    .value { font-size:24px; font-weight:750; margin-top:4px; }
    .green { color:var(--green); } .red { color:var(--red); } .amber { color:var(--amber); } .blue { color:var(--blue); }
    table { width:100%; border-collapse:collapse; }
    th,td { text-align:left; vertical-align:top; border-bottom:1px solid var(--line); padding:10px 8px; }
    th { color:var(--muted); font-size:12px; }
    code { background:#eef2f7; border-radius:4px; padding:1px 5px; }
    .small { color:var(--muted); font-size:12px; }
    .pill { display:inline-block; border:1px solid var(--line); border-radius:999px; padding:2px 9px; background:#fff; }
    @media (max-width:900px){ header{padding:22px 18px;} .grid{grid-template-columns:1fr;} }
  </style>
 </head>
 <body>
  <header>
    <h1>AI 預測驗證 Eval Console</h1>
    <div class="small">Job 073eefe3-10c0-438e-b2b7-42a3fbf0e85f · owner Louis · governance zihrou · implementation iron · due 2026-05-30</div>
  </header>
  <main>
    <section>
      <h2>本輪驗證狀態</h2>
      <div class="grid">
        <div class="card"><div class="label">Solution Type</div><div class="value blue">eval + system</div><div class="small">加 watchdog 閾值，不直接升 agent。</div></div>
        <div class="card"><div class="label">Seed Ledger</div><div class="value green">3 rows</div><div class="small">先跑 20 筆成為 golden set。</div></div>
        <div class="card"><div class="label">Risk Guard</div><div class="value amber">human review</div><div class="small">高風險 verdict 不自動改狀態。</div></div>
        <div class="card"><div class="label">Next Gate</div><div class="value red">needs data</div><div class="small">需要 evidence coverage 與命中率。</div></div>
      </div>
    </section>
    <section>
      <h2>評分規則</h2>
      <table>
        <tr><th>Metric</th><th>Pass</th><th>Action</th></tr>
        <tr><td><code>evidence_coverage</code></td><td>&gt;= 80%</td><td>低於門檻時要求 iron 補 source sync。</td></tr>
        <tr><td><code>hit_rate</code></td><td>&gt;= 70%</td><td>低於 60% alert Louis 暫停自動化擴權。</td></tr>
        <tr><td><code>false_positive_rate</code></td><td>&lt;= 15%</td><td>超標時重修 rubric/golden set。</td></tr>
        <tr><td><code>time_to_validation</code></td><td>&lt;= 7 days</td><td>逾期 prediction 進 watchdog。</td></tr>
      </table>
    </section>
    <section>
      <h2>資料流</h2>
      <table>
        <tr><th>來源</th><th>用途</th><th>Join Key</th></tr>
        <tr><td>signals</td><td>找預測後的實際事件</td><td>project_id + signal_type + semantic match</td></tr>
        <tr><td>action_items</td><td>驗證 owner 是否被派工或完成</td><td>project_id + assignee + due_date</td></tr>
        <tr><td>github_commit</td><td>驗證技術預測是否落地</td><td>project_id + commit summary</td></tr>
        <tr><td>deliverables</td><td>驗證 AI 是否產出 real files</td><td>hermes_job_id + deliverable_id</td></tr>
        <tr><td>people_reflections</td><td>驗證人設/近期關注是否更新</td><td>profile_id + project_id</td></tr>
      </table>
    </section>
    <section>
      <h2>Watchdog Alert</h2>
      <table>
        <tr><th>條件</th><th>通知對象</th><th>處理</th></tr>
        <tr><td>20 筆後 hit_rate &lt; 60%</td><td>Louis</td><td>暫停 AI prediction 擴權，重修 rubric。</td></tr>
        <tr><td>high risk verdict 無人工審核</td><td>zihrou</td><td>補 approval matrix。</td></tr>
        <tr><td>evidence sync error &gt; 5%</td><td>iron</td><td>派 repo_change 修 source sync。</td></tr>
      </table>
    </section>
  </main>
 </body>
 </html>
diff --git a/production-acceptance.md b/production-acceptance.md
diff --git a/production-brief.md b/production-brief.md
diff --git a/skill-usage.md b/skill-usage.md
diff --git a/solution-selection.md b/solution-selection.md
diff --git a/sources.md b/sources.md
column	type	required	note
`prediction_id`	uuid	yes	prediction key
`review_id`	uuid	yes	source review
`project_id`	uuid	yes	PLS project
`predicted_at`	timestamptz	yes	created time
`prediction_text`	text	yes	what AI predicted
`expected_signal_type`	text	yes	action_completed/github_commit/status_change/message
`expected_by`	date	yes	validation deadline
`risk_tier`	enum	yes	low/medium/high
`owner_profile_id`	uuid	no	accountable person
`status`	enum	yes	pending/hit/miss/partial/expired
column	type	required	note
`evidence_id`	uuid	yes	evidence key
`prediction_id`	uuid	yes	linked prediction
`source_type`	enum	yes	signal/action_item/github_commit/deliverable/person_reflection
`source_id`	uuid/text	yes	source row id
`event_time`	timestamptz	yes	evidence time
`match_strength`	numeric	yes	0-1
`match_reason`	text	yes	why linked
column	type	required	note
`score_id`	uuid	yes	score key
`prediction_id`	uuid	yes	linked prediction
`hit_score`	numeric	yes	0-100
`verdict`	enum	yes	hit/partial/miss/needs_human_review
`false_positive_risk`	numeric	yes	0-1
`validated_at`	timestamptz	yes	score time
`validated_by`	text	yes	worker/model/human
	{
	"job_id": "073eefe3-10c0-438e-b2b7-42a3fbf0e85f",
	"project": "AI 自建專案：公司AI化",
	"learned_at": "2026-05-24T06:40:00+08:00",
	"solution_selection": "eval + system + watchdog",
	"market_context": [
	{
	"source": "OpenAI evaluation best practices",
	"lesson": "Prediction validation should use production, historical, synthetic, and human-curated data, not a single judge score."
	},
	{
	"source": "LangSmith evaluation concepts",
	"lesson": "Evals should support lifecycle measurement, including production monitoring and benchmarking."
	},
	{
	"source": "MLOps patterns",
	"lesson": "AI systems need continuous monitoring, metadata, governance, and feedback loops tied to business KPIs."
	}
	],
	"pls_next_checks": [
	"Check whether each review writes explicit predictions into a ledger.",
	"Do not trust prediction quality until evidence_coverage and false_positive_rate are tracked.",
	"Require human approval before high-risk prediction verdicts affect people, budget, or project state.",
	"If evidence sync fails twice, dispatch repo_change for backend integration."
	],
	"assumptions_overturned": [
	"A commit that adds a validation module is not enough; PLS needs a visible eval contract and adoption path.",
	"AI prediction quality cannot be inferred from confidence text; it needs later evidence.",
	"Tool choice conflict should be resolved by schema and acceptance first, vendor second."
	],
	"next_iteration_condition": "Run 20 prediction validations; if hit_rate >= 70%, false_positive_rate <= 15%, and evidence_coverage >= 80%, promote to backend workflow or agent."
	}
prediction_id	review_id	project_id	predicted_at	prediction_text	expected_signal_type	expected_by	risk_tier	owner	status
PRED-001	REV-20260523-001	d2afbba2-f20a-4ca5-ab6b-8e848e5532ef	2026-05-23T14:50:36Z	新增 AI 預測驗證模組會產生後續 evidence sync 需求	github_commit	2026-05-30	medium	iron	pending
PRED-002	REV-20260523-002	d2afbba2-f20a-4ca5-ab6b-8e848e5532ef	2026-05-23T14:50:36Z	zihrou 需要定義高風險 prediction 的人工審核邊界	action_item	2026-05-30	high	zihrou	pending
PRED-003	REV-20260523-003	d2afbba2-f20a-4ca5-ab6b-8e848e5532ef	2026-05-23T14:50:36Z	Louis 會以 7 天內是否有可驗證成果判斷是否加碼 AI 管理層	message_or_decision	2026-05-30	high	Louis	pending
	<!doctype html>
	<html lang="zh-Hant">
	<head>
	<meta charset="utf-8">
	<meta name="viewport" content="width=device-width, initial-scale=1">
	<title>AI 預測驗證 Eval Console</title>
	<style>
	:root { --ink:#172033; --muted:#617086; --line:#d8dee9; --bg:#f5f7fb; --panel:#fff; --green:#087443; --red:#b42318; --amber:#a15c07; --blue:#175cd3; }
	* { box-sizing:border-box; }
	body { margin:0; background:var(--bg); color:var(--ink); font:14px/1.5 -apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif; }
	header { background:var(--panel); border-bottom:1px solid var(--line); padding:28px 32px; }
	h1 { margin:0 0 6px; font-size:26px; letter-spacing:0; }
	h2 { margin:0 0 12px; font-size:17px; }
	main { max-width:1240px; margin:0 auto; padding:22px 18px 42px; display:grid; gap:16px; }
	section { background:var(--panel); border:1px solid var(--line); border-radius:8px; padding:18px; }
	.grid { display:grid; grid-template-columns:repeat(4,minmax(0,1fr)); gap:12px; }
	.card { border:1px solid var(--line); border-radius:8px; background:#fbfcff; padding:14px; min-height:116px; }
	.label { color:var(--muted); font-size:12px; text-transform:uppercase; }
	.value { font-size:24px; font-weight:750; margin-top:4px; }
	.green { color:var(--green); } .red { color:var(--red); } .amber { color:var(--amber); } .blue { color:var(--blue); }
	table { width:100%; border-collapse:collapse; }
	th,td { text-align:left; vertical-align:top; border-bottom:1px solid var(--line); padding:10px 8px; }
	th { color:var(--muted); font-size:12px; }
	code { background:#eef2f7; border-radius:4px; padding:1px 5px; }
	.small { color:var(--muted); font-size:12px; }
	.pill { display:inline-block; border:1px solid var(--line); border-radius:999px; padding:2px 9px; background:#fff; }
	@media (max-width:900px){ header{padding:22px 18px;} .grid{grid-template-columns:1fr;} }
	</style>
	</head>
	<body>
	<header>
	<h1>AI 預測驗證 Eval Console</h1>
	<div class="small">Job 073eefe3-10c0-438e-b2b7-42a3fbf0e85f · owner Louis · governance zihrou · implementation iron · due 2026-05-30</div>
	</header>
	<main>
	<section>
	<h2>本輪驗證狀態</h2>
	<div class="grid">
	<div class="card"><div class="label">Solution Type</div><div class="value blue">eval + system</div><div class="small">加 watchdog 閾值，不直接升 agent。</div></div>
	<div class="card"><div class="label">Seed Ledger</div><div class="value green">3 rows</div><div class="small">先跑 20 筆成為 golden set。</div></div>
	<div class="card"><div class="label">Risk Guard</div><div class="value amber">human review</div><div class="small">高風險 verdict 不自動改狀態。</div></div>
	<div class="card"><div class="label">Next Gate</div><div class="value red">needs data</div><div class="small">需要 evidence coverage 與命中率。</div></div>
	</div>
	</section>
	<section>
	<h2>評分規則</h2>
	<table>
	<tr><th>Metric</th><th>Pass</th><th>Action</th></tr>
	<tr><td><code>evidence_coverage</code></td><td>>= 80%</td><td>低於門檻時要求 iron 補 source sync。</td></tr>
	<tr><td><code>hit_rate</code></td><td>>= 70%</td><td>低於 60% alert Louis 暫停自動化擴權。</td></tr>
	<tr><td><code>false_positive_rate</code></td><td><= 15%</td><td>超標時重修 rubric/golden set。</td></tr>
	<tr><td><code>time_to_validation</code></td><td><= 7 days</td><td>逾期 prediction 進 watchdog。</td></tr>
	</table>
	</section>
	<section>
	<h2>資料流</h2>
	<table>
	<tr><th>來源</th><th>用途</th><th>Join Key</th></tr>
	<tr><td>signals</td><td>找預測後的實際事件</td><td>project_id + signal_type + semantic match</td></tr>
	<tr><td>action_items</td><td>驗證 owner 是否被派工或完成</td><td>project_id + assignee + due_date</td></tr>
	<tr><td>github_commit</td><td>驗證技術預測是否落地</td><td>project_id + commit summary</td></tr>
	<tr><td>deliverables</td><td>驗證 AI 是否產出 real files</td><td>hermes_job_id + deliverable_id</td></tr>
	<tr><td>people_reflections</td><td>驗證人設/近期關注是否更新</td><td>profile_id + project_id</td></tr>
	</table>
	</section>
	<section>
	<h2>Watchdog Alert</h2>
	<table>
	<tr><th>條件</th><th>通知對象</th><th>處理</th></tr>
	<tr><td>20 筆後 hit_rate < 60%</td><td>Louis</td><td>暫停 AI prediction 擴權，重修 rubric。</td></tr>
	<tr><td>high risk verdict 無人工審核</td><td>zihrou</td><td>補 approval matrix。</td></tr>
	<tr><td>evidence sync error > 5%</td><td>iron</td><td>派 repo_change 修 source sync。</td></tr>
	</table>
	</section>
	</main>
	</body>
	</html>