esz135888 · May 23, 2026 18:34
diff --git a/acceptance-tests.md b/acceptance-tests.md
diff --git a/data-model.md b/data-model.md
diff --git a/decision-record.md b/decision-record.md
diff --git a/learning-memory.json b/learning-memory.json
 {
  "job_id": "3d2d136a-e3ee-4eb1-b1a7-573f20cac9d9",
  "project_id": "d2afbba2-f20a-4ca5-ab6b-8e848e5532ef",
  "ai_native_project_id": "a8befe83-b818-482a-a6bb-3df58f50c3a2",
  "learned_at": "2026-05-24T02:38:00+08:00",
  "signal": {
    "type": "github_commit",
    "summary": "新增 AI 預測驗證模組，透過 signals、action items 等多來源證據，自動核對上次 review 的預測是否命中",
    "root_cause_hypothesis": "公司 AI 化的下一個瓶頸不是沒有 AI review，而是 review 預測缺少 outcome label 與 evidence trail，導致工具選擇與系統化建置分歧無法快速校準。"
  },
  "recommended_next_check": [
    "prediction_claims 是否已有 10 筆 D1 seed",
    "evidence_events 是否接入 signals/action_items/GitHub commit",
    "unknown_rate 是否低於 25%",
    "miss_reason 是否能派 correction task",
    "Louis 是否接受 hit/miss/partial/unknown 定義"
  ],
  "do_not_repeat": [
    "不要只做 AI review 摘要",
    "不要只看 action item 逾期就判 prediction miss",
    "不要在缺 source_ref 時自動定 final label",
    "不要把 restricted evidence payload 暴露給不該讀的人"
  ],
  "next_worker_rules": {
    "if_label_policy_accepted": "seed_10_predictions_and_run_D7_batch_match",
    "if_unknown_rate_above_25_percent": "dispatch_data_source_gap_task",
    "if_false_positive_above_10_percent": "disable_semantic_auto_label_and_require_human_review",
    "if_tool_fragmentation_is_top_miss_reason": "dispatch_louis_tool_standard_decision_task",
    "if_execution_gap_is_top_miss_reason": "dispatch_owner_due_correction_task"
  },
  "market_context": {
    "checked_date": "2026-05-24",
    "pattern": "Production AI verification should combine observability traces, source correlation IDs, drift/quality monitoring, outcome labels, and human review queues."
  },
  "production_readiness": {
    "primary_artifact": "prediction-verification-cockpit.html",
    "e2e_verification": "Validate JSON, required keywords, uploaded files, durable Gist URL.",
    "required_artifacts": [
      "production-brief.md",
      "data-model.md",
      "acceptance-tests.md",
      "decision-record.md",
      "artifact-url-or-pr.md"
    ]
  }
 }
diff --git a/prediction-verification-cockpit.html b/prediction-verification-cockpit.html
 <!doctype html>
 <html lang="zh-Hant">
 <head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>AI 預測驗證 Cockpit</title>
  <style>
    :root {
      --ink: #111827;
      --muted: #5f6b7a;
      --line: #d7dee8;
      --bg: #f7f9fb;
      --panel: #ffffff;
      --green: #0f766e;
      --blue: #1d4ed8;
      --red: #b42318;
      --amber: #a15c00;
      --violet: #6d28d9;
    }
    * { box-sizing: border-box; }
    body {
      margin: 0;
      font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
      background: var(--bg);
      color: var(--ink);
      line-height: 1.5;
    }
    header {
      padding: 30px clamp(18px, 4vw, 50px);
      color: white;
      background: linear-gradient(120deg, #111827, #233047);
    }
    h1, h2, h3 { margin: 0; line-height: 1.2; }
    h1 { font-size: clamp(29px, 5vw, 48px); max-width: 1040px; }
    h2 { font-size: 22px; margin-bottom: 12px; }
    h3 { font-size: 16px; margin-bottom: 7px; }
    header p { max-width: 1020px; color: #d5dce8; margin: 10px 0 0; }
    main { padding: 24px clamp(16px, 4vw, 50px) 46px; }
    .grid { display: grid; grid-template-columns: repeat(12, 1fr); gap: 14px; margin-bottom: 22px; }
    .card {
      background: var(--panel);
      border: 1px solid var(--line);
      border-radius: 8px;
      padding: 16px;
      box-shadow: 0 1px 2px rgba(17, 24, 39, 0.04);
    }
    .span-3 { grid-column: span 3; }
    .span-4 { grid-column: span 4; }
    .span-6 { grid-column: span 6; }
    .span-8 { grid-column: span 8; }
    .span-12 { grid-column: span 12; }
    .metric { font-size: 32px; font-weight: 760; margin: 5px 0 2px; }
    .muted { color: var(--muted); }
    .tag {
      display: inline-flex;
      align-items: center;
      min-height: 24px;
      padding: 2px 8px;
      border-radius: 999px;
      font-size: 12px;
      font-weight: 720;
      border: 1px solid currentColor;
      margin: 2px 4px 2px 0;
    }
    .green { color: var(--green); }
    .blue { color: var(--blue); }
    .red { color: var(--red); }
    .amber { color: var(--amber); }
    .violet { color: var(--violet); }
    table { width: 100%; border-collapse: collapse; }
    th, td { text-align: left; padding: 10px 8px; border-bottom: 1px solid var(--line); vertical-align: top; }
    th { font-size: 12px; color: var(--muted); text-transform: uppercase; letter-spacing: 0; }
    ul, ol { margin: 8px 0 0 18px; padding: 0; }
    li { margin: 5px 0; }
    .flow {
      display: grid;
      grid-template-columns: repeat(5, minmax(130px, 1fr));
      gap: 10px;
    }
    .flow div {
      border: 1px solid var(--line);
      border-radius: 8px;
      padding: 12px;
      background: #fbfdff;
      min-height: 118px;
    }
    .code {
      font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
      font-size: 13px;
      background: #edf2f7;
      border-radius: 6px;
      padding: 2px 5px;
    }
    .line-draft {
      white-space: pre-wrap;
      background: #eff6ff;
      border: 1px solid #bfdbfe;
      border-radius: 8px;
      padding: 14px;
    }
    @media (max-width: 960px) {
      .span-3, .span-4, .span-6, .span-8 { grid-column: span 12; }
      .flow { grid-template-columns: 1fr; }
    }
  </style>
 </head>
 <body>
  <header>
    <h1>AI 預測驗證 Cockpit</h1>
    <p>把「上次 review 的預測」變成可被 signals、action items、GitHub、LINE/Drive 證據自動核對的 production 模組。目標不是多做一張報表，而是讓 Louis / zihrou / iron 對 AI 工具選擇與系統化建置有同一套命中率、證據等級、退修與派工節奏。</p>
  </header>
  <main>
    <section class="grid">
      <div class="card span-3">
        <h3>Verification Readiness</h3>
        <div class="metric green">R1</div>
        <p class="muted">可用 schema 跑第一批人工標註與自動比對。</p>
      </div>
      <div class="card span-3">
        <h3>Evidence Sources</h3>
        <div class="metric blue">5</div>
        <p class="muted">signals、action items、commit、deliverable、people reflection。</p>
      </div>
      <div class="card span-3">
        <h3>Owner / Due</h3>
        <div class="metric">Louis</div>
        <p class="muted">2026-05-27 前拍板命中定義與紅線。</p>
      </div>
      <div class="card span-3">
        <h3>Decision Risk</h3>
        <div class="metric amber">中</div>
        <p class="muted">zihrou、iron、Louis 工具選擇尚未完全統一。</p>
      </div>
    </section>

    <section class="grid">
      <div class="card span-8">
        <h2>D1 / D7 / D14 / D30 路徑</h2>
        <table>
          <thead><tr><th>節點</th><th>交付</th><th>驗收訊號</th></tr></thead>
          <tbody>
            <tr><td>D1</td><td>建立 prediction ledger 與 evidence matcher，先人工回填 10 筆 review prediction。</td><td>每筆有 claim、time window、expected signal、evidence source、label。</td></tr>
            <tr><td>D7</td><td>接入 signals/action_items/GitHub commit，自動產生 hit/miss/partial/unknown。</td><td>50 筆 prediction 有驗證結果，unknown 低於 25%。</td></tr>
            <tr><td>D14</td><td>將驗證結果接入 AI review 節奏，低命中類型自動派 correction task。</td><td>每週 review 能看到 calibration、false positive、false negative。</td></tr>
            <tr><td>D30</td><td>形成公司 AI 化 decision QA：預測、證據、派工、修正、治理完整閉環。</td><td>AI review 命中率、工具採用決策速度、逾期修正率每週可追。</td></tr>
          </tbody>
        </table>
      </div>
      <div class="card span-4">
        <h2>Purpose-to-Purpose E2E</h2>
        <ol>
          <li>原始目的：讓 AI review 真的改善公司 AI 化決策。</li>
          <li>產出物：驗證 cockpit、資料模型、比對規則、驗收測試。</li>
          <li>人採用：Louis 拍板紅線，zihrou/iron 回饋工具分歧訊號。</li>
          <li>指標改善：預測命中率、決策速度、派工準確率、風險提早發現率。</li>
        </ol>
      </div>
    </section>

    <section class="grid">
      <div class="card span-12">
        <h2>Verification Flow</h2>
        <div class="flow">
          <div><h3>1. Capture</h3><p>把 review 中的預測拆成 <span class="code">prediction_claims</span>，含 expected outcome 與時間窗。</p></div>
          <div><h3>2. Evidence</h3><p>收 signals、action items、commit、deliverable、people reflection，多來源都保留 source_ref。</p></div>
          <div><h3>3. Match</h3><p>用 rule + embedding/keyword candidate 產生 evidence_candidates 與 confidence。</p></div>
          <div><h3>4. Label</h3><p>輸出 hit / miss / partial / unknown，低信心進人工 review queue。</p></div>
          <div><h3>5. Act</h3><p>命中差的 prediction type 轉成 prompt 改寫、資料缺口或 owner 對焦任務。</p></div>
        </div>
      </div>
    </section>

    <section class="grid">
      <div class="card span-6">
        <h2>Evidence Ledger 範例</h2>
        <table>
          <thead><tr><th>Prediction</th><th>Evidence</th><th>Label</th><th>Action</th></tr></thead>
          <tbody>
            <tr><td>工具選擇未統一會拖慢系統化建置 2 週</td><td>action item 仍要求統一 AI 工具與同步節奏</td><td><span class="tag amber">partial</span></td><td>派 Louis 拍板工具標準</td></tr>
            <tr><td>AI 預測驗證模組會成為近期關注</td><td>GitHub commit 與 person_reflection 均指向 verification module</td><td><span class="tag green">hit</span></td><td>進入 D7 自動比對</td></tr>
            <tr><td>所有 action item 逾期皆因缺 owner</td><td>context 顯示部分 action 已有 owner，但缺 review cadence</td><td><span class="tag red">miss</span></td><td>修正 prompt：區分 owner 缺口與節奏缺口</td></tr>
          </tbody>
        </table>
      </div>
      <div class="card span-6">
        <h2>Human Capability Upgrade</h2>
        <ul>
          <li>Louis：看 AI review 的校準度，不只看敘述漂亮。</li>
          <li>zihrou：把分歧翻譯成方向、資源、授權三類缺口。</li>
          <li>iron：用 evidence ledger 對齊多線執行，而不是被催促。</li>
          <li>PLS：下一輪可根據 miss type 自動改 prompt、派資料任務或升級治理。</li>
        </ul>
      </div>
    </section>

    <section class="grid">
      <div class="card span-6">
        <h2>people_sync</h2>
        <div class="line-draft">LINE 草稿：
 Louis / zihrou / iron，PLS 已把「AI 預測驗證模組」整理成可落地驗證 cockpit。請在 2026-05-27 前回覆：
 1. accept：同意 hit/miss/partial/unknown 定義；
 2. revise：指出哪種預測不能這樣判；
 3. evidence_gap：補 signals/action items/commit 以外還要接的來源。
 驗收口徑：D7 前 50 筆 prediction 可自動產生 label，unknown 低於 25%，低信心進人工 review queue。</div>
      </div>
      <div class="card span-6">
        <h2>production_readiness</h2>
        <table>
          <thead><tr><th>Gate</th><th>狀態</th><th>證據</th></tr></thead>
          <tbody>
            <tr><td>主成果可打開</td><td><span class="tag green">PASS</span></td><td>本 HTML cockpit</td></tr>
            <tr><td>資料模型</td><td><span class="tag green">PASS</span></td><td>prediction/evidence/match/review tables</td></tr>
            <tr><td>權限/稽核</td><td><span class="tag green">PASS</span></td><td>source_ref、audit_actor、sensitive_source</td></tr>
            <tr><td>E2E 驗收</td><td><span class="tag blue">READY</span></td><td>D1/D7/D14/D30 gates</td></tr>
            <tr><td>下一輪升級</td><td><span class="tag violet">DEFINED</span></td><td>低命中 prediction type 自動派工</td></tr>
          </tbody>
        </table>
      </div>
    </section>
  </main>
 </body>
 </html>
diff --git a/production-brief.md b/production-brief.md
diff --git a/sources.md b/sources.md
欄位	型別	說明
id	uuid	prediction id
project_id	uuid	PLS project id
review_id	uuid	上次 AI review id
claim_text	text	可驗證預測句
prediction_type	enum	schedule, owner, risk, adoption, revenue, quality, tool_choice
expected_signal	text	預期會出現的證據
time_window_start	timestamptz	驗證開始
time_window_end	timestamptz	驗證結束
owner_member_id	uuid	預測關聯 owner
confidence_before	numeric	review 當下信心
status	enum	open, matched, reviewed, closed, retired
created_at	timestamptz	建立時間
欄位	型別	說明
id	uuid	evidence id
project_id	uuid	PLS project id
source_type	enum	signal, action_item, github_commit, deliverable, line_note, drive_doc, person_reflection
source_ref	text	URL / PLS id / commit sha
event_time	timestamptz	事件時間
actor_member_id	uuid	行動者
event_summary	text	摘要
payload	jsonb	原始資料
sensitive_level	enum	public, internal, restricted
audit_actor	uuid	匯入者
audit_at	timestamptz	匯入時間
欄位	型別	說明
id	uuid	match id
prediction_claim_id	uuid	prediction
evidence_event_id	uuid	evidence
match_method	enum	exact_ref, keyword, semantic, rule, human
match_score	numeric	0-1
label_candidate	enum	hit, miss, partial, unknown
rationale	text	比對理由
created_at	timestamptz	建立時間
欄位	型別	說明
id	uuid	review id
prediction_claim_id	uuid	prediction
final_label	enum	hit, miss, partial, unknown
miss_reason	enum	no_evidence, wrong_owner, wrong_timing, wrong_assumption, tool_fragmentation, data_gap, execution_gap
reviewer_member_id	uuid	Louis/zihrou/iron/PLS reviewer
review_note	text	判斷說明
correction_task_id	uuid	若 miss/partial 產生的 action item
reviewed_at	timestamptz	review 時間
欄位	型別	說明
id	uuid	snapshot id
project_id	uuid	PLS project id
period_start	date	週期開始
period_end	date	週期結束
prediction_count	int	總數
hit_rate	numeric	命中率
partial_rate	numeric	部分命中率
unknown_rate	numeric	未知率
false_positive_count	int	假陽性
false_negative_count	int	假陰性
top_miss_reasons	jsonb	前三大 miss reason
published_at	timestamptz	發布時間
角色	可讀	可寫	不可做
Louis	全部 calibration、prediction、evidence summary	hit/miss policy、final review	修改原始 evidence payload
zihrou	prediction、miss_reason、需協調任務	review note、缺口分類	讀 restricted 原始來源全文
iron	與自己任務/commit 相關 evidence	action correction 回覆	改 final label
PLS worker	必要 schema、source_ref、摘要	match candidate、progress、artifact	假裝 GitHub/LINE/部署成功
選項	說明	優點	風險
A. 只做 dashboard	顯示近期 signals/action item 狀態。	快速可視化。	無法回答上次預測是否命中。
B. 只做 prompt 檢討	由 AI 自評哪些預測準。	成本低。	容易自我合理化，缺 evidence trail。
C. 建 prediction verification ledger	prediction claim + evidence + match + label + correction task。	可稽核、可校準、可派工，能接 PLS 多來源資料。	初期需要人工標註 10-50 筆建立標準。
	{
	"job_id": "3d2d136a-e3ee-4eb1-b1a7-573f20cac9d9",
	"project_id": "d2afbba2-f20a-4ca5-ab6b-8e848e5532ef",
	"ai_native_project_id": "a8befe83-b818-482a-a6bb-3df58f50c3a2",
	"learned_at": "2026-05-24T02:38:00+08:00",
	"signal": {
	"type": "github_commit",
	"summary": "新增 AI 預測驗證模組，透過 signals、action items 等多來源證據，自動核對上次 review 的預測是否命中",
	"root_cause_hypothesis": "公司 AI 化的下一個瓶頸不是沒有 AI review，而是 review 預測缺少 outcome label 與 evidence trail，導致工具選擇與系統化建置分歧無法快速校準。"
	},
	"recommended_next_check": [
	"prediction_claims 是否已有 10 筆 D1 seed",
	"evidence_events 是否接入 signals/action_items/GitHub commit",
	"unknown_rate 是否低於 25%",
	"miss_reason 是否能派 correction task",
	"Louis 是否接受 hit/miss/partial/unknown 定義"
	],
	"do_not_repeat": [
	"不要只做 AI review 摘要",
	"不要只看 action item 逾期就判 prediction miss",
	"不要在缺 source_ref 時自動定 final label",
	"不要把 restricted evidence payload 暴露給不該讀的人"
	],
	"next_worker_rules": {
	"if_label_policy_accepted": "seed_10_predictions_and_run_D7_batch_match",
	"if_unknown_rate_above_25_percent": "dispatch_data_source_gap_task",
	"if_false_positive_above_10_percent": "disable_semantic_auto_label_and_require_human_review",
	"if_tool_fragmentation_is_top_miss_reason": "dispatch_louis_tool_standard_decision_task",
	"if_execution_gap_is_top_miss_reason": "dispatch_owner_due_correction_task"
	},
	"market_context": {
	"checked_date": "2026-05-24",
	"pattern": "Production AI verification should combine observability traces, source correlation IDs, drift/quality monitoring, outcome labels, and human review queues."
	},
	"production_readiness": {
	"primary_artifact": "prediction-verification-cockpit.html",
	"e2e_verification": "Validate JSON, required keywords, uploaded files, durable Gist URL.",
	"required_artifacts": [
	"production-brief.md",
	"data-model.md",
	"acceptance-tests.md",
	"decision-record.md",
	"artifact-url-or-pr.md"
	]
	}
	}
	<!doctype html>
	<html lang="zh-Hant">
	<head>
	<meta charset="utf-8">
	<meta name="viewport" content="width=device-width, initial-scale=1">
	<title>AI 預測驗證 Cockpit</title>
	<style>
	:root {
	--ink: #111827;
	--muted: #5f6b7a;
	--line: #d7dee8;
	--bg: #f7f9fb;
	--panel: #ffffff;
	--green: #0f766e;
	--blue: #1d4ed8;
	--red: #b42318;
	--amber: #a15c00;
	--violet: #6d28d9;
	}
	* { box-sizing: border-box; }
	body {
	margin: 0;
	font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
	background: var(--bg);
	color: var(--ink);
	line-height: 1.5;
	}
	header {
	padding: 30px clamp(18px, 4vw, 50px);
	color: white;
	background: linear-gradient(120deg, #111827, #233047);
	}
	h1, h2, h3 { margin: 0; line-height: 1.2; }
	h1 { font-size: clamp(29px, 5vw, 48px); max-width: 1040px; }
	h2 { font-size: 22px; margin-bottom: 12px; }
	h3 { font-size: 16px; margin-bottom: 7px; }
	header p { max-width: 1020px; color: #d5dce8; margin: 10px 0 0; }
	main { padding: 24px clamp(16px, 4vw, 50px) 46px; }
	.grid { display: grid; grid-template-columns: repeat(12, 1fr); gap: 14px; margin-bottom: 22px; }
	.card {
	background: var(--panel);
	border: 1px solid var(--line);
	border-radius: 8px;
	padding: 16px;
	box-shadow: 0 1px 2px rgba(17, 24, 39, 0.04);
	}
	.span-3 { grid-column: span 3; }
	.span-4 { grid-column: span 4; }
	.span-6 { grid-column: span 6; }
	.span-8 { grid-column: span 8; }
	.span-12 { grid-column: span 12; }
	.metric { font-size: 32px; font-weight: 760; margin: 5px 0 2px; }
	.muted { color: var(--muted); }
	.tag {
	display: inline-flex;
	align-items: center;
	min-height: 24px;
	padding: 2px 8px;
	border-radius: 999px;
	font-size: 12px;
	font-weight: 720;
	border: 1px solid currentColor;
	margin: 2px 4px 2px 0;
	}
	.green { color: var(--green); }
	.blue { color: var(--blue); }
	.red { color: var(--red); }
	.amber { color: var(--amber); }
	.violet { color: var(--violet); }
	table { width: 100%; border-collapse: collapse; }
	th, td { text-align: left; padding: 10px 8px; border-bottom: 1px solid var(--line); vertical-align: top; }
	th { font-size: 12px; color: var(--muted); text-transform: uppercase; letter-spacing: 0; }
	ul, ol { margin: 8px 0 0 18px; padding: 0; }
	li { margin: 5px 0; }
	.flow {
	display: grid;
	grid-template-columns: repeat(5, minmax(130px, 1fr));
	gap: 10px;
	}
	.flow div {
	border: 1px solid var(--line);
	border-radius: 8px;
	padding: 12px;
	background: #fbfdff;
	min-height: 118px;
	}
	.code {
	font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
	font-size: 13px;
	background: #edf2f7;
	border-radius: 6px;
	padding: 2px 5px;
	}
	.line-draft {
	white-space: pre-wrap;
	background: #eff6ff;
	border: 1px solid #bfdbfe;
	border-radius: 8px;
	padding: 14px;
	}
	@media (max-width: 960px) {
	.span-3, .span-4, .span-6, .span-8 { grid-column: span 12; }
	.flow { grid-template-columns: 1fr; }
	}
	</style>
	</head>
	<body>
	<header>
	<h1>AI 預測驗證 Cockpit</h1>
	<p>把「上次 review 的預測」變成可被 signals、action items、GitHub、LINE/Drive 證據自動核對的 production 模組。目標不是多做一張報表，而是讓 Louis / zihrou / iron 對 AI 工具選擇與系統化建置有同一套命中率、證據等級、退修與派工節奏。</p>
	</header>
	<main>
	<section class="grid">
	<div class="card span-3">
	<h3>Verification Readiness</h3>
	<div class="metric green">R1</div>
	<p class="muted">可用 schema 跑第一批人工標註與自動比對。</p>
	</div>
	<div class="card span-3">
	<h3>Evidence Sources</h3>
	<div class="metric blue">5</div>
	<p class="muted">signals、action items、commit、deliverable、people reflection。</p>
	</div>
	<div class="card span-3">
	<h3>Owner / Due</h3>
	<div class="metric">Louis</div>
	<p class="muted">2026-05-27 前拍板命中定義與紅線。</p>
	</div>
	<div class="card span-3">
	<h3>Decision Risk</h3>
	<div class="metric amber">中</div>
	<p class="muted">zihrou、iron、Louis 工具選擇尚未完全統一。</p>
	</div>
	</section>

	<section class="grid">
	<div class="card span-8">
	<h2>D1 / D7 / D14 / D30 路徑</h2>
	<table>
	<thead><tr><th>節點</th><th>交付</th><th>驗收訊號</th></tr></thead>
	<tbody>
	<tr><td>D1</td><td>建立 prediction ledger 與 evidence matcher，先人工回填 10 筆 review prediction。</td><td>每筆有 claim、time window、expected signal、evidence source、label。</td></tr>
	<tr><td>D7</td><td>接入 signals/action_items/GitHub commit，自動產生 hit/miss/partial/unknown。</td><td>50 筆 prediction 有驗證結果，unknown 低於 25%。</td></tr>
	<tr><td>D14</td><td>將驗證結果接入 AI review 節奏，低命中類型自動派 correction task。</td><td>每週 review 能看到 calibration、false positive、false negative。</td></tr>
	<tr><td>D30</td><td>形成公司 AI 化 decision QA：預測、證據、派工、修正、治理完整閉環。</td><td>AI review 命中率、工具採用決策速度、逾期修正率每週可追。</td></tr>
	</tbody>
	</table>
	</div>
	<div class="card span-4">
	<h2>Purpose-to-Purpose E2E</h2>
	<ol>
	<li>原始目的：讓 AI review 真的改善公司 AI 化決策。</li>
	<li>產出物：驗證 cockpit、資料模型、比對規則、驗收測試。</li>
	<li>人採用：Louis 拍板紅線，zihrou/iron 回饋工具分歧訊號。</li>
	<li>指標改善：預測命中率、決策速度、派工準確率、風險提早發現率。</li>
	</ol>
	</div>
	</section>

	<section class="grid">
	<div class="card span-12">
	<h2>Verification Flow</h2>
	<div class="flow">
	<div><h3>1. Capture</h3><p>把 review 中的預測拆成 <span class="code">prediction_claims</span>，含 expected outcome 與時間窗。</p></div>
	<div><h3>2. Evidence</h3><p>收 signals、action items、commit、deliverable、people reflection，多來源都保留 source_ref。</p></div>
	<div><h3>3. Match</h3><p>用 rule + embedding/keyword candidate 產生 evidence_candidates 與 confidence。</p></div>
	<div><h3>4. Label</h3><p>輸出 hit / miss / partial / unknown，低信心進人工 review queue。</p></div>
	<div><h3>5. Act</h3><p>命中差的 prediction type 轉成 prompt 改寫、資料缺口或 owner 對焦任務。</p></div>
	</div>
	</div>
	</section>

	<section class="grid">
	<div class="card span-6">
	<h2>Evidence Ledger 範例</h2>
	<table>
	<thead><tr><th>Prediction</th><th>Evidence</th><th>Label</th><th>Action</th></tr></thead>
	<tbody>
	<tr><td>工具選擇未統一會拖慢系統化建置 2 週</td><td>action item 仍要求統一 AI 工具與同步節奏</td><td><span class="tag amber">partial</span></td><td>派 Louis 拍板工具標準</td></tr>
	<tr><td>AI 預測驗證模組會成為近期關注</td><td>GitHub commit 與 person_reflection 均指向 verification module</td><td><span class="tag green">hit</span></td><td>進入 D7 自動比對</td></tr>
	<tr><td>所有 action item 逾期皆因缺 owner</td><td>context 顯示部分 action 已有 owner，但缺 review cadence</td><td><span class="tag red">miss</span></td><td>修正 prompt：區分 owner 缺口與節奏缺口</td></tr>
	</tbody>
	</table>
	</div>
	<div class="card span-6">
	<h2>Human Capability Upgrade</h2>
	<ul>
	<li>Louis：看 AI review 的校準度，不只看敘述漂亮。</li>
	<li>zihrou：把分歧翻譯成方向、資源、授權三類缺口。</li>
	<li>iron：用 evidence ledger 對齊多線執行，而不是被催促。</li>
	<li>PLS：下一輪可根據 miss type 自動改 prompt、派資料任務或升級治理。</li>
	</ul>
	</div>
	</section>

	<section class="grid">
	<div class="card span-6">
	<h2>people_sync</h2>
	<div class="line-draft">LINE 草稿：
	Louis / zihrou / iron，PLS 已把「AI 預測驗證模組」整理成可落地驗證 cockpit。請在 2026-05-27 前回覆：
	1. accept：同意 hit/miss/partial/unknown 定義；
	2. revise：指出哪種預測不能這樣判；
	3. evidence_gap：補 signals/action items/commit 以外還要接的來源。
	驗收口徑：D7 前 50 筆 prediction 可自動產生 label，unknown 低於 25%，低信心進人工 review queue。</div>
	</div>
	<div class="card span-6">
	<h2>production_readiness</h2>
	<table>
	<thead><tr><th>Gate</th><th>狀態</th><th>證據</th></tr></thead>
	<tbody>
	<tr><td>主成果可打開</td><td><span class="tag green">PASS</span></td><td>本 HTML cockpit</td></tr>
	<tr><td>資料模型</td><td><span class="tag green">PASS</span></td><td>prediction/evidence/match/review tables</td></tr>
	<tr><td>權限/稽核</td><td><span class="tag green">PASS</span></td><td>source_ref、audit_actor、sensitive_source</td></tr>
	<tr><td>E2E 驗收</td><td><span class="tag blue">READY</span></td><td>D1/D7/D14/D30 gates</td></tr>
	<tr><td>下一輪升級</td><td><span class="tag violet">DEFINED</span></td><td>低命中 prediction type 自動派工</td></tr>
	</tbody>
	</table>
	</div>
	</section>
	</main>
	</body>
	</html>
節點	交付成果	驗收
D1	建立 prediction ledger、evidence matcher、人工回填 10 筆 review prediction。	每筆 prediction 有 owner、time window、expected signal、source_ref、label policy。
D7	接入 signals/action_items/GitHub commit，產生 50 筆自動 label。	unknown 低於 25%，low confidence 全部進 review queue。
D14	將驗證結果接入每週 AI review，低命中類型自動派 correction task。	review 顯示 calibration、false positive、false negative、miss_reason。
D30	形成公司 AI 化 decision QA system。	AI review 命中率、決策速度、派工準確率、逾期修正率每週可追。